コード例 #1
0
ファイル: adb_cv.py プロジェクト: yskmt/kaggle-otto
train = train[random_select, :]
labels = labels[random_select]

# set up the model
params = {'n_estimators': n_estimators,
		  'learning_rate': learning_rate,
		  'algorithm': algorithm}
clf = ensemble.AdaBoostClassifier(n_estimators=params['n_estimators'],
								  learning_rate=params['learning_rate'],
								  algorithm=params['algorithm'])

# write parameters
logfile = 'log_adb/adb_cv_%d_%f_%s' %(params['n_estimators'],
									 params['learning_rate'],
									 params['algorithm'])
pname = logfile+'.txt'
xu.write_params(pname, clf.get_params())

# write results
fscores = open(logfile+'.csv', 'w')

# run CV
scores = cross_validation.cross_val_score(clf, train, labels,
										  cv=2, scoring=logloss_mc,
										  verbose=0)
print "scores: ", scores
fscores.write('%f %f\n' %(scores.mean(), scores.std()))
fscores.close()


コード例 #2
0
ファイル: rf_cv.py プロジェクト: yskmt/kaggle-otto
          'bootstrap': True, 'n_jobs': -1, 'verbose': 0}

clf = ensemble.RandomForestClassifier(n_jobs=params['n_jobs'],
                                      n_estimators=params['n_estimators'],
                                      max_depth=params['max_depth'],
                                      max_features=params['max_features'],
                                      verbose=params['verbose'],
                                      warm_start=True)
# oob_score=True)

print clf.get_params()

fscores = open('log_rf/scores_%d.csv' % arg1, 'a+', 1)  # line buffering
for i in range(1, params['n_estimators'] + 1):
    clf.set_params(n_estimators=i)
    scores = cross_validation.cross_val_score(clf, train, labels,
                                              cv=2, scoring=logloss_mc,
                                              verbose=0)
    print "CV #: %d" % i, "scores: ", scores
    fscores.write('%d %f %f\n' % (i, scores.mean(), scores.std()))

fscores.close()

# scores = cross_validation.cross_val_score(clf, train, labels, cv=5,
# scoring=logloss_mc, verbose=1)

# np.savetxt('log_rf/scores.csv', [scores.mean(), scores.std()])

fname = 'log_rf/params.txt'
xu.write_params(fname + '.txt', clf.get_params())