train = train[random_select, :] labels = labels[random_select] # set up the model params = {'n_estimators': n_estimators, 'learning_rate': learning_rate, 'algorithm': algorithm} clf = ensemble.AdaBoostClassifier(n_estimators=params['n_estimators'], learning_rate=params['learning_rate'], algorithm=params['algorithm']) # write parameters logfile = 'log_adb/adb_cv_%d_%f_%s' %(params['n_estimators'], params['learning_rate'], params['algorithm']) pname = logfile+'.txt' xu.write_params(pname, clf.get_params()) # write results fscores = open(logfile+'.csv', 'w') # run CV scores = cross_validation.cross_val_score(clf, train, labels, cv=2, scoring=logloss_mc, verbose=0) print "scores: ", scores fscores.write('%f %f\n' %(scores.mean(), scores.std())) fscores.close()
'bootstrap': True, 'n_jobs': -1, 'verbose': 0} clf = ensemble.RandomForestClassifier(n_jobs=params['n_jobs'], n_estimators=params['n_estimators'], max_depth=params['max_depth'], max_features=params['max_features'], verbose=params['verbose'], warm_start=True) # oob_score=True) print clf.get_params() fscores = open('log_rf/scores_%d.csv' % arg1, 'a+', 1) # line buffering for i in range(1, params['n_estimators'] + 1): clf.set_params(n_estimators=i) scores = cross_validation.cross_val_score(clf, train, labels, cv=2, scoring=logloss_mc, verbose=0) print "CV #: %d" % i, "scores: ", scores fscores.write('%d %f %f\n' % (i, scores.mean(), scores.std())) fscores.close() # scores = cross_validation.cross_val_score(clf, train, labels, cv=5, # scoring=logloss_mc, verbose=1) # np.savetxt('log_rf/scores.csv', [scores.mean(), scores.std()]) fname = 'log_rf/params.txt' xu.write_params(fname + '.txt', clf.get_params())