def predict_and_submit(X_train, y_train, testFpath, clfKlass, **clfArgs): X_test, testFnames = read_test_data_given_path(testFpath) clf = clfKlass(**clfArgs) clf.fit(X_train, y_train) pred = np.zeros((X_test.shape[0], len(CLASS_NAMES))) pred[:, np.sort(list(set(y_train)))] = clf.predict_proba(X_test) make_submission_file(pred, testFnames, fNameSuffix='nonDN')
# X_train, y = read_train_data(width, height) # x_fieldnames = np.array(['p_%i' % i for i in range(width*height)] + FEATURE_NAMES) # # # plot_feature_importances(fullX, ydata, # np.array(['p'+str(i) for i in range(lastlayer.shape[1])] + FEATURE_NAMES), # 0.7, numEstimators=100, min_samples_split=15) # plot_pixel_importances(10, 20, lastlayer, ydata) # for minSampleSplit in [17]: # print minSampleSplit # print evaluate(fullX, ydata, RandomForestClassifier, # n_estimators=100, n_jobs=cpu_count()-1, min_samples_split=minSampleSplit) featureVals_test = np.array(pandas.read_csv('/Users/jennyyuejin/K/NDSB/Data/X_test_48_48_featureVals.csv', header=None)) lastlayer_test = np.array(pandas.read_csv('/Users/jennyyuejin/K/NDSB/Data/lastlayerout_test.csv', header=None, sep=' ')) fullX_test = np.concatenate([lastlayer_test, featureVals_test], axis=1) testFnames = list(np.array(pandas.read_table('/Users/jennyyuejin/K/NDSB/Data/testFnames.txt', header=None)).ravel()) clf = RandomForestClassifier(n_estimators=100, min_samples_split=20) clf.fit(fullX, ydata) pred = np.zeros((fullX_test.shape[0], len(CLASS_NAMES))) pred[:, np.sort(list(set(ydata)))] = clf.predict_proba(fullX_test) make_submission_file(pred, testFnames, fNameSuffix='withLastLayer')