예제 #1
0
파일: main.py 프로젝트: jennyyuejin/Kaggle
def predict_and_submit(X_train, y_train, testFpath, clfKlass, **clfArgs):

    X_test, testFnames = read_test_data_given_path(testFpath)

    clf = clfKlass(**clfArgs)
    clf.fit(X_train, y_train)

    pred = np.zeros((X_test.shape[0], len(CLASS_NAMES)))
    pred[:, np.sort(list(set(y_train)))] = clf.predict_proba(X_test)

    make_submission_file(pred, testFnames, fNameSuffix='nonDN')
예제 #2
0
파일: main.py 프로젝트: jennyyuejin/Kaggle

    # X_train, y = read_train_data(width, height)

    # x_fieldnames = np.array(['p_%i' % i for i in range(width*height)] + FEATURE_NAMES)
    # #
    # plot_feature_importances(fullX, ydata,
    #                          np.array(['p'+str(i) for i in range(lastlayer.shape[1])] + FEATURE_NAMES),
    #                          0.7, numEstimators=100, min_samples_split=15)

    # plot_pixel_importances(10, 20, lastlayer, ydata)

    # for minSampleSplit in [17]:
    #     print minSampleSplit
    #     print evaluate(fullX, ydata, RandomForestClassifier,
    #                    n_estimators=100, n_jobs=cpu_count()-1, min_samples_split=minSampleSplit)


    featureVals_test = np.array(pandas.read_csv('/Users/jennyyuejin/K/NDSB/Data/X_test_48_48_featureVals.csv', header=None))
    lastlayer_test = np.array(pandas.read_csv('/Users/jennyyuejin/K/NDSB/Data/lastlayerout_test.csv', header=None, sep=' '))
    fullX_test = np.concatenate([lastlayer_test, featureVals_test], axis=1)
    testFnames = list(np.array(pandas.read_table('/Users/jennyyuejin/K/NDSB/Data/testFnames.txt', header=None)).ravel())

    clf = RandomForestClassifier(n_estimators=100, min_samples_split=20)
    clf.fit(fullX, ydata)

    pred = np.zeros((fullX_test.shape[0], len(CLASS_NAMES)))
    pred[:, np.sort(list(set(ydata)))] = clf.predict_proba(fullX_test)

    make_submission_file(pred, testFnames, fNameSuffix='withLastLayer')