Example #1
0
def score_solution():
    '''
    this function fits the pipeline to training data and tests the model on the testing data and finally returns roc_auc_score 
    '''

    import solution
    pipeline = solution.get_pipeline()

    error_message = 'Your `solution.get_pipeline` implementation should ' 'return an `sklearn.pipeline.Pipeline`.'
    assert isinstance(pipeline, sklearn.pipeline.Pipeline), error_message

    # Train the model on the training DataFrame.

    X_train, y_train = get_data(subset='train')
    X_train, _ = encoding(missing_values(X_train))
    print('\n')
    print('Training........')
    print('\n')
    pipeline.fit(X_train, y_train)

    # Apply the model to the test DataFrame.

    X_test, y_test = get_data(subset='test')
    X_test, _ = encoding(missing_values(X_test))
    print('\n')
    print('Test Results')
    print('\n')
    y_pred = pipeline.predict_proba(X_test)

    assert (y_pred.ndim == 1) or (
        y_pred.ndim == 2 and y_pred.shape[1] == 2
    ), 'The predicted probabilities should match sklearn' 's ' '`predict_proba` output shape.'
    y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1]

    return sklearn.metrics.roc_auc_score(y_test, y_pred)
Example #2
0
def score_solution(model, save=0):
    '''
    Added a model and save parameter:
        model ~ hold a classification model 
        save ~ Flag used to save the best model on file using jobLib
    '''
    # Ask the solution for the model pipeline.
    import solution
    pipeline = solution.get_pipeline(model)
    error_message = 'Your `solution.get_pipeline` implementation should ' \
        'return an `sklearn.pipeline.Pipeline`.'
    assert isinstance(pipeline, sklearn.pipeline.Pipeline), error_message
    # Train the model on the training DataFrame.
    X_train, y_train = get_data(subset='train')
    pipeline.fit(X_train, y_train)
    # Apply the model to the test DataFrame.
    X_test, y_test = get_data(subset='test')
    y_pred = pipeline.predict_proba(X_test)
    # Check that the predicted probabilities have an sklearn-compatible shape.
    assert (y_pred.ndim == 1) or \
        (y_pred.ndim == 2 and y_pred.shape[1] == 2), \
        'The predicted probabilities should match sklearn''s ' \
        '`predict_proba` output shape`.'
    y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1]
    # Evaluate the predictions with the AUC of the ROC curve.
    if (save == 1): joblib.dump(pipeline, 'Best_Estimator.sav')
    return sklearn.metrics.roc_auc_score(y_test, y_pred)
Example #3
0
def grid_search():
    from solution import get_pipeline

    s = settings()
    grid_params = s['GRID_PARAMS']
    pipeline = get_pipeline()
    search = GridSearchCV(pipeline, grid_params, n_jobs=8)
    X_train, y_train = get_data(subset='train')
    results = search.fit(X_train, y_train)
    print(search.best_params_)
    print("====='")
    print(search.cv_results_)
Example #4
0
def score_solution():
    # Ask the solution for the model pipeline.
    import solution
    pipeline = solution.get_pipeline()
    error_message = 'Your `solution.get_pipeline` implementation should ' \
        'return an `sklearn.pipeline.Pipeline`.'
    assert isinstance(pipeline, sklearn.pipeline.Pipeline), error_message
    # Train the model on the training DataFrame.
    X_train, y_train = get_data(subset='train')
    pipeline.fit(X_train, y_train)
    # Apply the model to the test DataFrame.
    X_test, y_test = get_data(subset='test')
    y_pred = pipeline.predict_proba(X_test)
    # Check that the predicted probabilities have an sklearn-compatible shape.
    assert (y_pred.ndim == 1) or \
        (y_pred.ndim == 2 and y_pred.shape[1] == 2), \
        'The predicted probabilities should match sklearn''s ' \
        '`predict_proba` output shape.'
    y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1]
    # Evaluate the predictions with the AUC of the ROC curve.
    return sklearn.metrics.roc_auc_score(y_test, y_pred)
Example #5
0
# param_grid = dict(
#     #pipeline__missingcategoricalstransformer__strategy=["none"],
#     tensorflowestimator__dropout=[0.15, 0.20, 0.25, 0.3, 0.35, 0.40],
#     tensorflowestimator__hidden_units=[[64,32], [24], [48], [30, 12], [128, 48], [128,48,12]],
#     tensorflowestimator__training_steps=[900]
# )
# TensorFlowEstimator(dropout=0.3, hidden_units=[128, 48, 12], training_steps=900)
param_grid = dict(
    #pipeline__missingcategoricalstransformer__strategy=["none"],
    tensorflowestimator__dropout=[0.35, 0.40, 0.45, 0.5],
    tensorflowestimator__hidden_units=[[128,48,12], [1024, 514, 256, 128, 64], [200, 100, 40], [56, 28, 12]],
    tensorflowestimator__training_steps=[900]
)

#{'tensorflowestimator__dropout': 0.4, 'tensorflowestimator__hidden_units': [200, 100, 40], 'tensorflowestimator__training_steps': 900}

gs = GridSearchCV(solution.get_pipeline(), param_grid, cv=6, n_jobs=4)

from challenge import get_dataw
X, y = get_data()


# print(X.index.values)
# index = np.arange(X.index.values[0], X.index.values[-1]+1)
# print(index)
#
# X.set_index(index, inplace=True)
# print(X.index.values)

gs.fit(X, y)
print(gs.best_params_)