def optimize_sgdc(): df, xnames, yname = load_df_from_pkl() baseline_sgdc(df, xnames, yname) t0 = time.time() params_grid = [{"clf__n_iter": [5, 8, 10, 15], "clf__alpha": [1e-7, 1e-6, 1e-5, 0.0001, 0.001, 0.01, 0.1, 1], "clf__penalty": ["none", "l1", "l2"], 'clf__loss': ['log']}] optmizer = ClassifierOptimizer('sgdc') # linear_model.SGDClassifier(loss='log') optmizer.set_params(params_grid) optmizer.add_pipleline([("scalar", StandardScaler())]) optmizer.optimize(df[xnames], df[yname], cv = 5, scoring = 'roc_auc') print optmizer.get_score_gridsearchcv() t1 = time.time() # time it print "finish in %4.4fmin for %s " %((t1-t0)/60,'optimizer')
def optimize_rf(df, xnames, yname): optmizer = ClassifierOptimizer('rf') params = [{"clf__n_estimators": [100, 300] , 'clf__max_depth':[5, 10, 20, 30] , 'clf__min_samples_leaf':[5] , 'clf__class_weight':[None, {0:1, 1:1}, {0:1,1:5}, {0:1,1:10}, 'auto'] }] optmizer.set_params(params) optmizer.optimize(df[xnames], df[yname], cv = 5, scoring = 'roc_auc') print optmizer.get_score_gridsearchcv()