Esempio n. 1
0
def compare_change(data):
    """
    Run multiple models: fit on training data, predict for tournament data.
    Then change the data, rerun and compare performance with and without the
    change.
    """

    print('\nStandard dataset:\n')

    # we'll look at 5 models
    prediction = nx.production(nx.logistic(), data, verbosity=1)
    prediction += nx.production(nx.extratrees(), data, verbosity=1)
    prediction += nx.production(nx.randomforest(), data, verbosity=1)
    prediction += nx.production(nx.mlpc(), data, verbosity=1)
    prediction += nx.production(nx.logisticPCA(), data, verbosity=1)

    # let's now make a change, could be anything; as an example let's add
    # the square of each feature to the dataset
    x = np.hstack((data.x, data.x * data.x))
    data2 = data.xnew(x)

    print('\nDataset expanded with squared features:\n')

    # rerun all models with the new expanded data
    prediction2 = nx.production(nx.logistic(), data2, verbosity=1)
    prediction2 += nx.production(nx.extratrees(), data2, verbosity=1)
    prediction2 += nx.production(nx.randomforest(), data2, verbosity=1)
    prediction2 += nx.production(nx.mlpc(), data2, verbosity=1)
    prediction2 += nx.production(nx.logisticPCA(), data2, verbosity=1)

    # compare performance
    print('\nCompare (1 is regular dataset; 2 expanded dataset):\n')
    print(prediction.compare(data['validation'], prediction2))
Esempio n. 2
0
def improve_model(data, tournament='kazutsugi'):
    """
    Run multiple models: fit on training data, predict for tournament data.
    Then change the data, rerun and compare performance with and without the
    change.
    """

    # we'll look at 5 models
    models = [nx.linear(), nx.extratrees(), nx.randomforest(), nx.mlpc(),
              nx.linearPCA()]

    print('\nStandard dataset:\n')

    # first run the base case
    prediction = nx.production(models, data, tournament, verbosity=1)

    # let's now make a change, could be anything; as an example let's add
    # the square of each feature to the dataset
    x = np.hstack((data.x, data.x * data.x))
    data2 = data.xnew(x)

    print('\nDataset expanded with squared features:\n')

    # rerun all models with the new expanded data
    prediction2 = nx.production(models, data2, tournament, verbosity=1)

    # compare performance
    print('\nCompare (1 is regular dataset; 2 expanded dataset):\n')
    print(prediction.compare(data['validation'], prediction2, tournament))
Esempio n. 3
0
def concordance(data, tournament='bernie'):
    """
    Example showing how to calculate concordance.
    Concordance must be less than 0.12 to pass numerai's check.
    For an accurate concordance calculation `data` must be the full dataset.
    """
    models = [nx.logistic(), nx.extratrees(), nx.mlpc()]
    p = nx.production(models, data, tournament)
    print("\nA concordance less than 0.12 is passing")
    print(p.concordance(data))
Esempio n. 4
0
def concordance_example(data):
    """
    Example showing how to calculate concordance.
    Concordance must be less than 0.12 to pass numerai's check.
    For an accurate concordance calculation `data` must be the full dataset.
    """
    prediction = nx.production(nx.logistic(), data)
    prediction += nx.production(nx.extratrees(), data)
    prediction += nx.production(nx.mlpc(), data)
    print("\nA concordance less than 0.12 is passing")
    print(prediction.concordance(data))
Esempio n. 5
0
def get_models():
    models = [
        nx.logistic(),
        nx.extratrees(),
        nx.randomforest(),
        nx.mlpc(),
        nx.logisticPCA(),
        nx.example_predictions(),
        fifty()
    ]
    return models
Esempio n. 6
0
def get_models():

    models = [nx.linear(),
              nx.ridge_mean(),
              nx.extratrees(),
              nx.randomforest(),
              nx.mlpc(),
              nx.linearPCA(),
              nx.example_predictions(),
              nx.fifty()]

    return models
Esempio n. 7
0
def runner_example():

    data = nx.play_data()
    splitter = nx.CVSplitter(data)

    # let's run 3 models
    m1 = {'model': nx.logistic(), 'prediction_file': None, 'csv_file': None}
    m2 = {'model': nx.logistic(1e-4)}
    m3 = {'model': nx.extratrees()}
    run_list = [m1, m2, m3]

    # we won't save anything, just display the results
    runner = nx.Runner(run_list, splitter, verbosity=1)
    runner.run()
Esempio n. 8
0
def compare_models(data):
    """
    Run multiple models: fit on training data, predict for tournament data.
    Then compare performance of the models
    """

    # we'll look at 5 models
    prediction = nx.production(nx.logistic(), data, verbosity=1)
    prediction += nx.production(nx.extratrees(), data, verbosity=1)
    prediction += nx.production(nx.randomforest(), data, verbosity=1)
    prediction += nx.production(nx.mlpc(), data, verbosity=1)
    prediction += nx.production(nx.logisticPCA(), data, verbosity=1)

    # correlation of models with logistic regression
    print('\nCorrelation:\n')
    prediction.correlation('logistic')

    # compare performance of models
    print('\nPerformance comparison:\n')
    print(prediction.performance(data['validation'], sort_by='logloss'))

    # dominance of models
    print('\nModel dominance:\n')
    print(prediction.dominance(data['validation'], sort_by='logloss'))

    # dominace between two models
    print('\nModel dominance between two models:\n')
    df = prediction[['logistic', 'logisticPCA']].dominance(data['validation'])
    print(df)

    # originality given that logistic model has already been submitted
    print('\nModel originality (versus logistic):\n')
    print(prediction.originality(['logistic']))

    # concordance
    print('\nConcordance:\n')
    print(prediction.concordance(data))
Esempio n. 9
0
def get_models():
    models = [nx.logistic(), nx.extratrees(), nx.randomforest()]
    if HAS_XGBOOST:
        models.append(nx.xgboost())
    return models