Ejemplo n.º 1
0
def compare_change(data):
    """
    Run multiple models: fit on training data, predict for tournament data.
    Then change the data, rerun and compare performance with and without the
    change.
    """

    print('\nStandard dataset:\n')

    # we'll look at 5 models
    prediction = nx.production(nx.logistic(), data, verbosity=1)
    prediction += nx.production(nx.extratrees(), data, verbosity=1)
    prediction += nx.production(nx.randomforest(), data, verbosity=1)
    prediction += nx.production(nx.mlpc(), data, verbosity=1)
    prediction += nx.production(nx.logisticPCA(), data, verbosity=1)

    # let's now make a change, could be anything; as an example let's add
    # the square of each feature to the dataset
    x = np.hstack((data.x, data.x * data.x))
    data2 = data.xnew(x)

    print('\nDataset expanded with squared features:\n')

    # rerun all models with the new expanded data
    prediction2 = nx.production(nx.logistic(), data2, verbosity=1)
    prediction2 += nx.production(nx.extratrees(), data2, verbosity=1)
    prediction2 += nx.production(nx.randomforest(), data2, verbosity=1)
    prediction2 += nx.production(nx.mlpc(), data2, verbosity=1)
    prediction2 += nx.production(nx.logisticPCA(), data2, verbosity=1)

    # compare performance
    print('\nCompare (1 is regular dataset; 2 expanded dataset):\n')
    print(prediction.compare(data['validation'], prediction2))
Ejemplo n.º 2
0
def improve_model(data, tournament='kazutsugi'):
    """
    Run multiple models: fit on training data, predict for tournament data.
    Then change the data, rerun and compare performance with and without the
    change.
    """

    # we'll look at 5 models
    models = [nx.linear(), nx.extratrees(), nx.randomforest(), nx.mlpc(),
              nx.linearPCA()]

    print('\nStandard dataset:\n')

    # first run the base case
    prediction = nx.production(models, data, tournament, verbosity=1)

    # let's now make a change, could be anything; as an example let's add
    # the square of each feature to the dataset
    x = np.hstack((data.x, data.x * data.x))
    data2 = data.xnew(x)

    print('\nDataset expanded with squared features:\n')

    # rerun all models with the new expanded data
    prediction2 = nx.production(models, data2, tournament, verbosity=1)

    # compare performance
    print('\nCompare (1 is regular dataset; 2 expanded dataset):\n')
    print(prediction.compare(data['validation'], prediction2, tournament))
Ejemplo n.º 3
0
def concordance(data, tournament='bernie'):
    """
    Example showing how to calculate concordance.
    Concordance must be less than 0.12 to pass numerai's check.
    For an accurate concordance calculation `data` must be the full dataset.
    """
    models = [nx.logistic(), nx.extratrees(), nx.mlpc()]
    p = nx.production(models, data, tournament)
    print("\nA concordance less than 0.12 is passing")
    print(p.concordance(data))
Ejemplo n.º 4
0
def concordance_example(data):
    """
    Example showing how to calculate concordance.
    Concordance must be less than 0.12 to pass numerai's check.
    For an accurate concordance calculation `data` must be the full dataset.
    """
    prediction = nx.production(nx.logistic(), data)
    prediction += nx.production(nx.extratrees(), data)
    prediction += nx.production(nx.mlpc(), data)
    print("\nA concordance less than 0.12 is passing")
    print(prediction.concordance(data))
Ejemplo n.º 5
0
def get_models():
    models = [
        nx.logistic(),
        nx.extratrees(),
        nx.randomforest(),
        nx.mlpc(),
        nx.logisticPCA(),
        nx.example_predictions(),
        fifty()
    ]
    return models
Ejemplo n.º 6
0
def get_models():

    models = [nx.linear(),
              nx.ridge_mean(),
              nx.extratrees(),
              nx.randomforest(),
              nx.mlpc(),
              nx.linearPCA(),
              nx.example_predictions(),
              nx.fifty()]

    return models
Ejemplo n.º 7
0
def compare_models(data):
    """
    Run multiple models: fit on training data, predict for tournament data.
    Then compare performance of the models
    """

    # we'll look at 5 models
    prediction = nx.production(nx.logistic(), data, verbosity=1)
    prediction += nx.production(nx.extratrees(), data, verbosity=1)
    prediction += nx.production(nx.randomforest(), data, verbosity=1)
    prediction += nx.production(nx.mlpc(), data, verbosity=1)
    prediction += nx.production(nx.logisticPCA(), data, verbosity=1)

    # correlation of models with logistic regression
    print('\nCorrelation:\n')
    prediction.correlation('logistic')

    # compare performance of models
    print('\nPerformance comparison:\n')
    print(prediction.performance(data['validation'], sort_by='logloss'))

    # dominance of models
    print('\nModel dominance:\n')
    print(prediction.dominance(data['validation'], sort_by='logloss'))

    # dominace between two models
    print('\nModel dominance between two models:\n')
    df = prediction[['logistic', 'logisticPCA']].dominance(data['validation'])
    print(df)

    # originality given that logistic model has already been submitted
    print('\nModel originality (versus logistic):\n')
    print(prediction.originality(['logistic']))

    # concordance
    print('\nConcordance:\n')
    print(prediction.concordance(data))