Exemple #1
0
def test_multiple_runs():
    "test running multiple models through multiple tournaments"

    d = testing.play_data()
    models = [nx.logistic(), nx.fifty()]

    with testing.HiddenPrints():

        p = nx.production(models, d, 'bernie')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d, 2)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), 'ken')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d)
        ok_(p.shape[1] == 10, 'wrong number of tournaments')
        p = nx.backtest(models, d)
        ok_(p.shape[1] == 10, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d))
        ok_(p.shape[1] == 10, 'wrong number of tournaments')

        p = nx.production(models, d, [1, 5])
        ok_(p.shape[1] == 4, 'wrong number of tournaments')
        ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments')
        p = nx.backtest(models, d, ['charles', 'bernie'])
        ok_(p.shape[1] == 4, 'wrong number of tournaments')
        ok_(p.tournaments() == ['bernie', 'charles'], 'wrong tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), ['ken'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['ken'], 'wrong tournaments')
Exemple #2
0
def test_prediction_setitem():
    "compare prediction._setitem__ with merge"

    data = nx.play_data()
    p1 = nx.production(nx.logistic(), data, 'model1', verbosity=0)
    p2 = nx.production(nx.logistic(1e-5), data, 'model2',  verbosity=0)
    p3 = nx.production(nx.logistic(1e-6), data, 'model3',  verbosity=0)
    p4 = nx.backtest(nx.logistic(), data, 'model1',  verbosity=0)

    p = nx.Prediction()
    p['model1'] = p1
    p['model2'] = p2
    p['model3'] = p3
    p['model1'] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, 'model1', p1)
    assert_raises(ValueError, p.__setitem__, 'model1', p)
Exemple #3
0
def test_multiple_runs():
    """test running multiple models through multiple tournaments"""

    d = testing.play_data()
    models = [nx.linear(), nx.fifty()]

    with testing.HiddenPrints():

        p = nx.production(models, d, 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d, 8)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), 'kazutsugi')
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.backtest(models, d)
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        p = nx.run(models, nx.ValidationSplitter(d))
        ok_(p.shape[1] == 2, 'wrong number of tournaments')

        p = nx.production(models, d, [8])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.backtest(models, d, ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
        p = nx.run(models, nx.ValidationSplitter(d), ['kazutsugi'])
        ok_(p.shape[1] == 2, 'wrong number of tournaments')
        ok_(p.tournaments() == ['kazutsugi'], 'wrong tournaments')
Exemple #4
0
def test_prediction_setitem():
    "compare prediction._setitem__ with merge"

    data = nx.play_data()
    p1 = nx.production(nx.logistic(), data, 'bernie', verbosity=0)
    p2 = nx.production(nx.logistic(1e-5), data, 2, verbosity=0)
    p3 = nx.production(nx.logistic(1e-6), data, 3, verbosity=0)
    p4 = nx.backtest(nx.logistic(), data, 4, verbosity=0)

    p = nx.Prediction()
    p[('logistic', 1)] = p1
    p[('logistic', 2)] = p2
    p[('logistic', 3)] = p3
    p[('logistic', 4)] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, ('logistic', 1), p1)
    assert_raises(ValueError, p.__setitem__, ('logistic', 1), p)
Exemple #5
0
def test_prediction_check():
    "make sure prediction.check runs"
    d = nx.play_data()
    p = nx.production(nx.logistic(), d, verbosity=0)
    p += nx.production(nx.logisticPCA(), d, verbosity=0)
    df = p.check(['logistic'], d)
    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
Exemple #6
0
def improve_model(data, tournament='bernie'):
    """
    Run multiple models: fit on training data, predict for tournament data.
    Then change the data, rerun and compare performance with and without the
    change.
    """

    # we'll look at 5 models
    models = [
        nx.logistic(),
        nx.extratrees(),
        nx.randomforest(),
        nx.mlpc(),
        nx.logisticPCA()
    ]

    print('\nStandard dataset:\n')

    # first run the base case
    prediction = nx.production(models, data, tournament, verbosity=1)

    # let's now make a change, could be anything; as an example let's add
    # the square of each feature to the dataset
    x = np.hstack((data.x, data.x * data.x))
    data2 = data.xnew(x)

    print('\nDataset expanded with squared features:\n')

    # rerun all models with the new expanded data
    prediction2 = nx.production(models, data2, tournament, verbosity=1)

    # compare performance
    print('\nCompare (1 is regular dataset; 2 expanded dataset):\n')
    print(prediction.compare(data['validation'], prediction2, tournament))
Exemple #7
0
def test_prediction_setitem():
    """compare prediction._setitem__ with merge"""

    data = nx.play_data()

    p1 = nx.production(nx.linear(), data, 'kazutsugi', verbosity=0)
    p2 = nx.production(nx.linear(), data, 8, verbosity=0)
    p3 = nx.production(nx.linear(), data, 8, verbosity=0)
    p4 = nx.backtest(nx.linear(), data, 8, verbosity=0)

    p = nx.Prediction()
    p[('linear', 1)] = p1
    p[('linear', 2)] = p2
    p[('linear', 3)] = p3
    p[('linear', 4)] = p4

    pp = nx.Prediction()
    pp = pp.merge(p1)
    pp = pp.merge(p2)
    pp = pp.merge(p3)
    pp = pp.merge(p4)

    pd.testing.assert_frame_equal(p.df, pp.df)

    assert_raises(ValueError, p.__setitem__, ('linear', 1), p1)
    assert_raises(ValueError, p.__setitem__, ('linear', 1), p)
Exemple #8
0
def test_backtest_production():
    "Make sure backtest and production run"
    d = testing.micro_data()
    model = fifty()
    with testing.HiddenPrints():
        for verbosity in (0, 1, 2, 3):
            nx.backtest(model, d, kfold=2, verbosity=verbosity)
            nx.production(model, d, verbosity=verbosity)
            if verbosity == 3:
                nx.production(model, d, name='test', verbosity=verbosity)
Exemple #9
0
def concordance_example(data):
    """
    Example showing how to calculate concordance.
    Concordance must be less than 0.12 to pass numerai's check.
    For an accurate concordance calculation `data` must be the full dataset.
    """
    prediction = nx.production(nx.logistic(), data)
    prediction += nx.production(nx.extratrees(), data)
    prediction += nx.production(nx.mlpc(), data)
    print("\nA concordance less than 0.12 is passing")
    print(prediction.concordance(data))
Exemple #10
0
def test_prediction_regression():
    "regression test of prediction performance evaluation"
    d = nx.play_data()
    p = nx.production(nx.logistic(), d, tournament=None, verbosity=0)
    for number, name in nx.tournament_iter():
        p2 = nx.production(nx.logistic(), d, tournament=name, verbosity=0)
        df = p.performance_mean(d['validation'], mean_of='tournament')
        logloss1 = df.loc[name]['logloss']
        logloss2 = p2.summary(d['validation']).loc['mean']['logloss']
        diff = np.abs(logloss1 - logloss2)
        msg = 'failed on {}'.format(name)
        ok_(diff < 1e-6, msg)
Exemple #11
0
def test_prediction_regression():
    """regression test of prediction performance evaluation"""
    d = nx.play_data()
    p = nx.production(nx.linear(), d, tournament=None, verbosity=0)
    for number, name in nx.tournament_iter():
        p2 = nx.production(nx.linear(), d, tournament=name, verbosity=0)
        df = p.performance_mean(d['validation'], mean_of='tournament')
        logloss1 = float('%.3f' % (df.loc[name]['mse']))
        logloss2 = float('%.3f' %
                         (p2.summary(d['validation']).loc['mean']['mse']))
        diff = np.abs(logloss1 - logloss2)
        msg = f"failed on {name}"
        ok_(diff < 1e-6, msg)
Exemple #12
0
def predict_and_submit(tournaments, data, model_class, numerai_public,
                       numerai_secret):
    model_name = model_class.__name__
    for tournament_name in tournaments:
        saved_model_name = f'model_trained_{model_name}_{tournament_name}'
        if os.path.exists(saved_model_name):
            print("using saved model for", tournament_name)
            m = model_class.load(saved_model_name)
        else:
            print("saved model not found for", tournament_name)
            m = model_class(verbose=True)

            print("training model for", tournament_name)
            m.fit(data['train'], tournament_name)

        print(
            f"running predictions for {model_name} on tournament {tournament_name}",
            flush=True)
        # fit model with train data and make predictions for tournament data
        prediction = nx.production(m, data, tournament=tournament_name)

        # save predictions to csv file
        prediction_filename = f'/tmp/prediction_{model_name}_{tournament_name}.csv'
        prediction.to_csv(prediction_filename, verbose=True)

    # submit the prediction
    for tournament_name in tournaments:
        prediction_filename = f'/tmp/prediction_{model_name}_{tournament_name}.csv'

        submission_id = nx.upload(prediction_filename,
                                  tournament_name,
                                  numerai_public,
                                  numerai_secret,
                                  block=False,
                                  n_tries=3)
Exemple #13
0
def test_prediction_roundtrip():
    "save/load roundtrip shouldn't change prediction"
    d = micro_data()
    m = nx.logistic()
    p = nx.production(m, d, verbosity=0)
    with tempfile.NamedTemporaryFile() as temp:
        p.save(temp.name)
        p2 = nx.load_prediction(temp.name)
        ade(p, p2, "prediction corrupted during roundtrip")
Exemple #14
0
def concordance(data, tournament='bernie'):
    """
    Example showing how to calculate concordance.
    Concordance must be less than 0.12 to pass numerai's check.
    For an accurate concordance calculation `data` must be the full dataset.
    """
    models = [nx.logistic(), nx.extratrees(), nx.mlpc()]
    p = nx.production(models, data, tournament)
    print("\nA concordance less than 0.12 is passing")
    print(p.concordance(data))
Exemple #15
0
def test_prediction_check():
    """make sure prediction.check runs"""
    d = nx.play_data()
    p1 = nx.production(nx.linear(), d, 'kazutsugi', verbosity=0)
    p2 = p1.copy()
    p2 = p2.rename('example_predictions')
    p = p1 + p2
    with nx.testing.HiddenPrints():
        df = p.check(d)
    ok_(isinstance(df, dict), 'expecting a dictionary')
Exemple #16
0
def predict_and_submit(data, model_class):
    model_name = model_class.__name__
    model_id = model_class.model_id
    logger = logging.getLogger(model_name)

    for tournament in nx.tournament_names():

        logger.info(f"Predict and submit for {tournament} using {model_class}")
        saved_model_name = f'model_trained_{model_name}_{tournament}'
        if os.path.exists(saved_model_name):
            logger.info(f'Using saved model {saved_model_name}')
            m = model_class.load(saved_model_name)
        else:
            logger.info(f'Saved model {saved_model_name} not found')
            m = model_class(verbose=True)

            try:
                logger.info(
                    f'Training model against {tournament} training data')
                m.fit(data['train'], tournament)
            except Exception as e:
                logger.error(f'Failed to train {model_class} - {e}')
                return

        # fit model with train data and make predictions for tournament data
        logger.info(f'Predicting with {model_class} on {tournament} data')
        prediction = nx.production(m, data, tournament=tournament)

        # save predictions to csv file
        prediction_filename = f'/tmp/prediction_{model_name}_{tournament}.csv'
        logger.info(f"Saving predictions to {prediction_filename}")
        prediction.to_csv(prediction_filename, verbose=True)

        try:
            # submit the prediction
            logger.info(
                f"Submitting predictions from {prediction_filename} using {model_id}"
            )
            submission_id, status = nx.upload(prediction_filename,
                                              tournament,
                                              NUMERAI_PUBLIC_ID,
                                              NUMERAI_SECRET_KEY,
                                              block=False,
                                              n_tries=N_TRIES,
                                              sleep_seconds=SLEEP_SECONDS,
                                              model_id=model_id)
            logger.info(status)
            logger.info(
                f'Successfully submitted predictions using model_id {model_id}'
            )
        except Exception as e:
            logger.error(f'Upload failed with - {e}')
Exemple #17
0
def compare_models(data):
    """
    Run multiple models: fit on training data, predict for tournament data.
    Then compare performance of the models
    """

    # we'll look at 5 models
    prediction = nx.production(nx.logistic(), data, verbosity=1)
    prediction += nx.production(nx.extratrees(), data, verbosity=1)
    prediction += nx.production(nx.randomforest(), data, verbosity=1)
    prediction += nx.production(nx.mlpc(), data, verbosity=1)
    prediction += nx.production(nx.logisticPCA(), data, verbosity=1)

    # correlation of models with logistic regression
    print('\nCorrelation:\n')
    prediction.correlation('logistic')

    # compare performance of models
    print('\nPerformance comparison:\n')
    print(prediction.performance(data['validation'], sort_by='logloss'))

    # dominance of models
    print('\nModel dominance:\n')
    print(prediction.dominance(data['validation'], sort_by='logloss'))

    # dominace between two models
    print('\nModel dominance between two models:\n')
    df = prediction[['logistic', 'logisticPCA']].dominance(data['validation'])
    print(df)

    # originality given that logistic model has already been submitted
    print('\nModel originality (versus logistic):\n')
    print(prediction.originality(['logistic']))

    # concordance
    print('\nConcordance:\n')
    print(prediction.concordance(data))
Exemple #18
0
def main():

    # download dataset from numerai
    nx.download_dataset('numerai_dataset.zip', verbose=True)

    # load numerai dataset
    data = nx.load_zip('numerai_dataset.zip', verbose=True)

    # we will use logistic regression; you will want to write your own model
    model = nx.logistic()

    # fit model with train data and make predictions for tournament data
    prediction = nx.production(model, data)

    # save predictions to csv file for later upload to numerai
    prediction.to_csv('logistic.csv', verbose=True)
Exemple #19
0
def predict():

    tournaments = nx.tournament_names()
    print(tournaments)

    # download dataset from numerai
    data = nx.download('numerai_dataset.zip', load=False)
    print('data downloaded')
    data = nx.load_zip('numerai_dataset.zip', single_precision=True)
    print('data loaded')

    for tournament_name in tournaments:
        saved_model_name = 'model_trained_' + tournament_name
        if os.path.exists(saved_model_name):
            print("using saved model for", tournament_name)
            m = model.LinearModel.load(saved_model_name)
        else:
            print("saved model not found for", tournament_name)
            m = model.LinearModel(verbose=True)

            print("training model for", tournament_name)
            m.fit(data['train'], tournament_name)

        print("running predictions for", tournament_name, flush=True)
        # fit model with train data and make predictions for tournament data
        prediction = nx.production(m, data, tournament=tournament_name)

        # save predictions to csv file
        prediction_filename = '/tmp/prediction_' + tournament_name + '.csv'
        prediction.to_csv(prediction_filename, verbose=True)

    # submit the prediction

    # Numerai API key
    # You will need to create an API key by going to https://numer.ai/account and clicking "Add" under the "Your API keys" section.
    # Select the following permissions for the key: "Upload submissions", "Make stakes", "View historical submission info", "View user info"
    public_id = os.environ["NUMERAI_PUBLIC_ID"]
    secret_key = os.environ["NUMERAI_SECRET_KEY"]

    for tournament_name in tournaments:
        prediction_filename = '/tmp/prediction_' + tournament_name + '.csv'

        api = NumerAPI(public_id=public_id, secret_key=secret_key)
        model_id = api.get_models()
        api.upload_predictions(prediction_filename,
                               model_id=model_id['akrimedes_2'])
def numerox_example():
    """
    Example of how to prepare a submission for the Numerai tournament.
    It uses Numerox which you can install with: pip install numerox
    For more information see: https://github.com/kwgoodman/numerox
    """

    # download dataset from numerai, save it and then load it
    data = nx.download('numerai_dataset.zip')

    # we will use logistic regression; you will want to write your own model
    model = nx.logistic()

    # fit model with train data and make predictions for tournament data
    prediction = nx.production(model, data, tournament='bernie')

    # save predictions to csv file
    prediction.to_csv('logistic.csv', verbose=True)
def predict_and_submit():
    # Numerai API key
    # You will need to create an API key by going to https://numer.ai/account and clicking "Add" under the "Your API keys" section.
    # Select the following permissions for the key: "Upload submissions", "Make stakes", "View historical submission info", "View user info"
    public_id = os.environ["NUMERAI_PUBLIC_ID"]
    secret_key = os.environ["NUMERAI_SECRET_KEY"]

    tournaments = nx.tournament_names()
    print(tournaments)

    # download dataset from numerai
    data = nx.download('numerai_dataset.zip')

    for tournament_name in tournaments:
        saved_model_name = 'model_trained_' + tournament_name
        if os.path.exists(saved_model_name):
            print("using saved model for", tournament_name)
            m = model.LogisticModel.load(saved_model_name)
        else:
            print("saved model not found for", tournament_name)
            m = model.LogisticModel(verbose=True)

            print("training model for", tournament_name)
            m.fit(data['train'], tournament_name)

        print("running predictions for", tournament_name)
        # fit model with train data and make predictions for tournament data
        prediction = nx.production(m, data, tournament=tournament_name)

        # save predictions to csv file
        prediction_filename = '/tmp/prediction_' + tournament_name + '.csv'
        prediction.to_csv(prediction_filename, verbose=True)

    # submit the prediction
    for tournament_name in tournaments:
        prediction_filename = '/tmp/prediction_' + tournament_name + '.csv'

        submission_id = nx.upload(prediction_filename,
                                  tournament_name,
                                  public_id,
                                  secret_key,
                                  block=False)
Exemple #22
0
    m.append(log_loss(y, yhat))
    m.append(roc_auc_score(y, yhat))
    yh = np.zeros(yhat.size)
    yh[yhat >= 0.5] = 1
    m.append(accuracy_score(y, yh))
    m.append(yhat.std())
    return m


if __name__ == '__main__':
    # test prediction.performance()
    import numerox as nx
    data = nx.load_data('/data/nx/numerai_dataset_20171024.hdf')
    model = nx.model.logistic()
    prediction1 = nx.backtest(model, data, verbosity=1)
    prediction2 = nx.production(model, data)
    """
    prediction = prediction1 + prediction2
    print prediction
    prediction.performance(data)
    prediction.save('/data/nx/pred/logistic_1e-5.pred')
    """

    """
    for c in (1e-1, 1e-2, 1e-3, 1e-4, 1e-5):
        print c
        model = nx.model.logistic(c)
        prediction1 = nx.backtest(model, data, verbosity=1)
        prediction2 = nx.production(model, data)
        prediction = prediction1 + prediction2
        prediction.save('/data/nx/pred/logistic_{:.0e}.pred'.format(c))
Exemple #23
0
def test_backtest_production():
    "Make sure backtest and production run"
    d = testing.micro_data()
    model = nx.fifty()
    with testing.HiddenPrints():
        p = nx.production(model, d)
        ok_(p.shape[1] == 5, 'wrong number of tournaments')
        ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments')
        p = nx.backtest(model, d, kfold=2)
        ok_(p.shape[1] == 5, 'wrong number of tournaments')
        ok_(p.tournaments() == nx.tournament_all(), 'wrong tournaments')
        for verbosity in (0, 1, 2, 3):
            nx.backtest(model, d, tournament=3, kfold=2, verbosity=verbosity)
            nx.production(model, d, tournament='ken', verbosity=verbosity)
            nx.production(model, d, tournament=4, verbosity=verbosity)
            nx.production(model, d, tournament=None, verbosity=verbosity)
            if verbosity == 3:
                nx.production(model, d, tournament=5, verbosity=verbosity)
                nx.production(model,
                              d,
                              tournament='charles',
                              verbosity=verbosity)
Exemple #24
0
def test_prediction_concordance():
    """make sure prediction.concordance runs"""
    d = nx.testing.play_data()
    p = nx.production(nx.linear(), d, 8, verbosity=0)
    df = p.concordance(d)
    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
Exemple #25
0
import numerox as nx
from model import get_model


data = nx.load_zip('numerai_dataset.zip')
model = get_model()

prediction = nx.production(model, data, 'bernie', verbosity=1)
prediction.to_csv('output.csv', tournament='bernie')
Exemple #26
0
def test_prediction_concordance():
    "make sure prediction.concordance runs"
    d = testing.play_data()
    p = nx.production(nx.logistic(), d, 'model1', verbosity=0)
    df = p.concordance(d)
    ok_(isinstance(df, pd.DataFrame), 'expecting a dataframe')
Exemple #27
0
def concordance_example():
    data = nx.play_data()
    model = nx.logistic()
    prediction = nx.production(model, data)
    concord = nx.concordance(data, prediction)
    print("concordance {:.4f} (less than 0.12 is passing)".format(concord))
Exemple #28
0
    def check(self, data, verbose=True):
        """
        Run Numerai upload checks.

        Parameters
        ----------
        data : nx.Data
            Data object of Numerai dataset.
        verbose : bool
            By default, True, output is printed to stdout.

        Returns
        -------
        check : dict
            A dictionary where the keys are the (name, tournament) pairs and
            the values are Pandas DataFrames that contain the results of the
            checks.
        """

        # calc example predictions
        example_y = {}
        for tournament in self.tournaments(as_str=False):
            ep = nx.production(nx.example_predictions(),
                               data,
                               tournament=tournament,
                               verbosity=0)
            ep = ep.loc[self.ids]
            example_y[tournament] = ep.y[:, 0]

        df_dict = {}
        columns = ['validation', 'test', 'live', 'all', 'pass']
        data = data.loc[self.ids]
        regions = data.region
        pairs = list(self.pairs(as_str=False))

        # check each model, tournament pair
        for pair in pairs:
            print('{}, {}'.format(pair[0], nx.tournament_str(pair[1])))
            df = pd.DataFrame(columns=columns)
            idx = pairs.index(pair)
            y = self.y[:, idx]
            for region in ('validation', 'test', 'live', 'all'):
                yexi = example_y[pair[1]]
                if region == 'all':
                    yi = y
                else:
                    idx = regions == region
                    yi = y[idx]
                    yexi = yexi[idx]
                df.loc['corr', region] = pearsonr(yi, yexi)[0]
                df.loc['rcorr', region] = spearmanr(yi, yexi)[0]
                df.loc['min', region] = yi.min()
                df.loc['max', region] = yi.max()
                maz = np.abs((yi - yi.mean()) / yi.std()).max()
                df.loc['maz', region] = maz

            df.loc['corr', 'pass'] = (df.loc['corr'][:-1] >= 0.2).all()
            df.loc['rcorr', 'pass'] = (df.loc['rcorr'][:-1] >= 0.2).all()
            df.loc['min', 'pass'] = (df.loc['min'][:-1] >= 0.3).all()
            df.loc['max', 'pass'] = (df.loc['max'][:-1] <= 0.7).all()
            df.loc['maz', 'pass'] = (df.loc['maz'][:-1] <= 15).all()

            print(df)

            df_dict[pair] = df

        return df_dict
        X_train = X[train_index][:,3:]
        # y_train remains the same
        y_train = y[train_index]
        
        print ">> running split #", counter
        
        print ">> finding best params"
        clf = model_selection.GridSearchCV(linear_model.LogisticRegression(), parameters, scoring="neg_log_loss", cv=kfold_split, n_jobs=-1)
        clf.fit(X_train, y_train)
        best_params = clf.best_params_
        print ">> best params: ", best_params

        # create a new logistic regression model for the tournament
        model = logistic(best_params)

        print ">> training info:"
        train = nx.backtest(model, data, tournament, verbosity=1)

        print ">> validation info:"
        validation = nx.production(model, data, tournament, verbosity=1)

        print ">> saving validation info: "
        validation.to_csv(MODEL_NAME + "-" + tournament + "-" + str(counter) + ".csv")
        print ">> done saving validation info"

        print "\n"
        
        counter=counter+1
    

Exemple #30
0
for tournament_name in tournaments:
    saved_model_name = 'model_trained_' + tournament_name
    if os.path.exists(saved_model_name):
        print("using saved model for", tournament_name)
        m = model.LinearModel.load(saved_model_name)
    else:
        print("saved model not found for", tournament_name)
        m = model.LinearModel(verbose=True)

        print("training model for", tournament_name)
        m.fit(data['train'], tournament_name)

    print("running predictions for", tournament_name, flush=True)
    # fit model with train data and make predictions for tournament data
    prediction = nx.production(m, data, tournament=tournament_name)

    # save predictions to csv file
    prediction_filename = '/tmp/prediction_' + tournament_name + '.csv'
    prediction.to_csv(prediction_filename, verbose=True)

# submit the prediction

# Numerai API key
# You will need to create an API key by going to https://numer.ai/account and clicking "Add" under the "Your API keys" section.
# Select the following permissions for the key: "Upload submissions", "Make stakes", "View historical submission info", "View user info"
public_id = os.environ["NUMERAI_PUBLIC_ID"]
secret_key = os.environ["NUMERAI_SECRET_KEY"]

for tournament_name in tournaments:
    prediction_filename = '/tmp/prediction_' + tournament_name + '.csv'