def score_solution(): ''' this function fits the pipeline to training data and tests the model on the testing data and finally returns roc_auc_score ''' import solution pipeline = solution.get_pipeline() error_message = 'Your `solution.get_pipeline` implementation should ' 'return an `sklearn.pipeline.Pipeline`.' assert isinstance(pipeline, sklearn.pipeline.Pipeline), error_message # Train the model on the training DataFrame. X_train, y_train = get_data(subset='train') X_train, _ = encoding(missing_values(X_train)) print('\n') print('Training........') print('\n') pipeline.fit(X_train, y_train) # Apply the model to the test DataFrame. X_test, y_test = get_data(subset='test') X_test, _ = encoding(missing_values(X_test)) print('\n') print('Test Results') print('\n') y_pred = pipeline.predict_proba(X_test) assert (y_pred.ndim == 1) or ( y_pred.ndim == 2 and y_pred.shape[1] == 2 ), 'The predicted probabilities should match sklearn' 's ' '`predict_proba` output shape.' y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1] return sklearn.metrics.roc_auc_score(y_test, y_pred)
def score_solution(model, save=0): ''' Added a model and save parameter: model ~ hold a classification model save ~ Flag used to save the best model on file using jobLib ''' # Ask the solution for the model pipeline. import solution pipeline = solution.get_pipeline(model) error_message = 'Your `solution.get_pipeline` implementation should ' \ 'return an `sklearn.pipeline.Pipeline`.' assert isinstance(pipeline, sklearn.pipeline.Pipeline), error_message # Train the model on the training DataFrame. X_train, y_train = get_data(subset='train') pipeline.fit(X_train, y_train) # Apply the model to the test DataFrame. X_test, y_test = get_data(subset='test') y_pred = pipeline.predict_proba(X_test) # Check that the predicted probabilities have an sklearn-compatible shape. assert (y_pred.ndim == 1) or \ (y_pred.ndim == 2 and y_pred.shape[1] == 2), \ 'The predicted probabilities should match sklearn''s ' \ '`predict_proba` output shape`.' y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1] # Evaluate the predictions with the AUC of the ROC curve. if (save == 1): joblib.dump(pipeline, 'Best_Estimator.sav') return sklearn.metrics.roc_auc_score(y_test, y_pred)
def grid_search(): from solution import get_pipeline s = settings() grid_params = s['GRID_PARAMS'] pipeline = get_pipeline() search = GridSearchCV(pipeline, grid_params, n_jobs=8) X_train, y_train = get_data(subset='train') results = search.fit(X_train, y_train) print(search.best_params_) print("====='") print(search.cv_results_)
def score_solution(): # Ask the solution for the model pipeline. import solution pipeline = solution.get_pipeline() error_message = 'Your `solution.get_pipeline` implementation should ' \ 'return an `sklearn.pipeline.Pipeline`.' assert isinstance(pipeline, sklearn.pipeline.Pipeline), error_message # Train the model on the training DataFrame. X_train, y_train = get_data(subset='train') pipeline.fit(X_train, y_train) # Apply the model to the test DataFrame. X_test, y_test = get_data(subset='test') y_pred = pipeline.predict_proba(X_test) # Check that the predicted probabilities have an sklearn-compatible shape. assert (y_pred.ndim == 1) or \ (y_pred.ndim == 2 and y_pred.shape[1] == 2), \ 'The predicted probabilities should match sklearn''s ' \ '`predict_proba` output shape.' y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1] # Evaluate the predictions with the AUC of the ROC curve. return sklearn.metrics.roc_auc_score(y_test, y_pred)
# param_grid = dict( # #pipeline__missingcategoricalstransformer__strategy=["none"], # tensorflowestimator__dropout=[0.15, 0.20, 0.25, 0.3, 0.35, 0.40], # tensorflowestimator__hidden_units=[[64,32], [24], [48], [30, 12], [128, 48], [128,48,12]], # tensorflowestimator__training_steps=[900] # ) # TensorFlowEstimator(dropout=0.3, hidden_units=[128, 48, 12], training_steps=900) param_grid = dict( #pipeline__missingcategoricalstransformer__strategy=["none"], tensorflowestimator__dropout=[0.35, 0.40, 0.45, 0.5], tensorflowestimator__hidden_units=[[128,48,12], [1024, 514, 256, 128, 64], [200, 100, 40], [56, 28, 12]], tensorflowestimator__training_steps=[900] ) #{'tensorflowestimator__dropout': 0.4, 'tensorflowestimator__hidden_units': [200, 100, 40], 'tensorflowestimator__training_steps': 900} gs = GridSearchCV(solution.get_pipeline(), param_grid, cv=6, n_jobs=4) from challenge import get_dataw X, y = get_data() # print(X.index.values) # index = np.arange(X.index.values[0], X.index.values[-1]+1) # print(index) # # X.set_index(index, inplace=True) # print(X.index.values) gs.fit(X, y) print(gs.best_params_)