#!/usr/bin/env python # First prediction, using sklearn's RandomForestClassifier import lib.loader as ld import sklearn.ensemble as ens if __name__ == '__main__': trainx, trainy = ld.loadtrain('data/trainingdata.txt') testx = ld.loadtest('data/testingdata.txt') clf = ens.RandomForestClassifier(criterion='entropy', n_estimators=20) clf.fit(trainx, trainy) ld.write('predictions/Prediction1.txt', clf.predict(testx)) # Score: 0.52515
n_jobs=n_jobs)) # 0.0420 + 0.16 - 0.04 elif rand == 5: numChange = comp.append( lm.SGDClassifier(loss='hinge', alpha=0.000631 + 0.01 * r.random())) # 0.000631 + 0.01 elif rand == 6: numChange = comp.append( svm.LinearSVC(dual=False, C=0.01 + 0.1 * r.random())) # 0.01 + 0.1 elif rand == 7: numChange = comp.append( ens.RandomForestClassifier(n_estimators=100, max_features=0.120 + 0.4 * r.random(), min_samples_split=7, n_jobs=n_jobs)) # 0.32 +- 0.2 elif rand == 8: numChange = comp.append( svm.LinearSVC(dual=False, C=100 + 900 * r.random())) # 1000 - 900 if numChange == 0: num_consec += 1 else: num_consec = 0 print('Number CV Changes: ' + str(numChange)) if len(comp.clfs) % save_interval == 0: # don't save every time ld.write('predictions/Prediction7.txt', comp.predict())
# 0.7 +- 0.3 elif rand == 4: numChange = comp.append(ens.RandomForestClassifier(n_estimators=450, max_features=0.0020 + 0.2 * r.random(), min_samples_split=7, n_jobs=n_jobs)) # 0.0420 + 0.16 - 0.04 elif rand == 5: numChange = comp.append(lm.SGDClassifier(loss='hinge', alpha=0.000631 + 0.01 * r.random())) # 0.000631 + 0.01 elif rand == 6: numChange = comp.append(svm.LinearSVC(dual=False, C=0.01 + 0.1 * r.random())) # 0.01 + 0.1 elif rand == 7: numChange = comp.append(ens.RandomForestClassifier(n_estimators=100, max_features=0.120 + 0.4 * r.random(), min_samples_split=7, n_jobs=n_jobs)) # 0.32 +- 0.2 elif rand == 8: numChange = comp.append(svm.LinearSVC(dual=False, C=100 + 900 * r.random())) # 1000 - 900 if numChange == 0: num_consec += 1 else: num_consec = 0 print('Number CV Changes: ' + str(numChange)) if len(comp.clfs) % save_interval == 0: # don't save every time ld.write('predictions/Prediction7.txt', comp.predict())
#!/usr/bin/env python # prediction 3 with more features. Totally guessing by now import numpy as np import lib.loader as ld import sklearn.ensemble as ens import sklearn.feature_extraction.text as tfidf if __name__ == '__main__': ens.RandomForestClassifier(n_jobs=-1) trainx, trainy = ld.loadtrain('data/trainingdata.txt') testx = ld.loadtest('data/testingdata.txt') trainx2 = tfidf.TfidfTransformer().fit_transform(trainx) testx2 = tfidf.TfidfTransformer().fit_transform(testx) clf = ens.RandomForestClassifier(max_features=0.38, criterion='entropy', n_estimators=5000, min_samples_split=7) clf.fit(trainx2, trainy) ld.write('predictions/Prediction5.txt', clf.predict(testx2)) # score=0.51
#!/usr/bin/env python # prediction 3, with gridsearch'd parameters import lib.loader as ld import sklearn.ensemble as ens import sklearn.feature_extraction.text as tfidf if __name__ == '__main__': ens.RandomForestClassifier(n_jobs=-1) trainx, trainy = ld.loadtrain('data/trainingdata.txt') testx = ld.loadtest('data/testingdata.txt') trainx2 = tfidf.TfidfTransformer().fit_transform(trainx) testx2 = tfidf.TfidfTransformer().fit_transform(testx) clf = ens.RandomForestClassifier(max_features=0.0420, criterion='entropy', n_estimators=5000, min_samples_split=7) clf.fit(trainx2, trainy) ld.write('predictions/Prediction3.txt', clf.predict(testx2)) # score 0.51