Example #1
0
def optimize_sgdc():
	df, xnames, yname = load_df_from_pkl()
	baseline_sgdc(df, xnames, yname)
	t0 = time.time()
	params_grid = [{"clf__n_iter": [5, 8, 10, 15],
              "clf__alpha": [1e-7, 1e-6, 1e-5, 0.0001, 0.001, 0.01, 0.1, 1],
              "clf__penalty": ["none", "l1", "l2"],
              'clf__loss': ['log']}]     
	optmizer = ClassifierOptimizer('sgdc') # linear_model.SGDClassifier(loss='log') 
	optmizer.set_params(params_grid)
	optmizer.add_pipleline([("scalar",  StandardScaler())])

	optmizer.optimize(df[xnames], df[yname], cv = 5, scoring = 'roc_auc')
	print optmizer.get_score_gridsearchcv()
	t1 = time.time() # time it
	print "finish in  %4.4fmin for %s " %((t1-t0)/60,'optimizer')
Example #2
0
def optimize_rf(df, xnames, yname):
	optmizer = ClassifierOptimizer('rf')
	params = [{"clf__n_estimators": [100, 300]
		, 'clf__max_depth':[5, 10, 20, 30]
		, 'clf__min_samples_leaf':[5]
		, 'clf__class_weight':[None, {0:1, 1:1}, {0:1,1:5}, {0:1,1:10}, 'auto'] }]
	optmizer.set_params(params)
	optmizer.optimize(df[xnames], df[yname], cv = 5, scoring = 'roc_auc')
	print optmizer.get_score_gridsearchcv()