Ejemplo n.º 1
0
    print("Validating regression model...")
    p = progressbar.ProgressBar(maxval=len(y_valid)).start()
    mse = []
    for i in range(len(y_valid)):
        X_sample = X_valid[i]
        y_sample = y_valid[i]
        y_pred = np.exp(reg.predict([X_sample]))
        mse.append(np.sqrt(np.mean((y_pred - y_sample)**2)))
        p.update(i)
    p.update(len(y_valid))
    print('')
    print("Regression mse:")
    print(np.mean(mse), np.std(mse)/np.sqrt(len(y_valid)))

    print('Fit with all data')
    reg.fit(X,np.log(y))
    
    print('Transform test set...')
    test_df = test_df_orig.copy()
    predstd = cls.predict(tr.transform(test_df_orig).values)
#    test_df['prob0'] = predstd == 0
#    test_df['prob1'] = predstd == 1
#    test_df['prob2'] = predstd == 2
#    test_df['prob3'] = predstd == 3
    test_df['prob4'] = predstd == 4
    X = tr.transform(test_df).values

    print('Predict test set...')
    yp = np.exp(reg.predict(X))
    RevenueCompetition.save_data(yp, 'data/revenue_20150431_03.csv')
Ejemplo n.º 2
0
Archivo: rf.py Proyecto: PKostya/kaggle
    for i in range(len(y_valid)):
        X_sample = X_valid[i]
        y_sample = y_valid[i]
        y_pred = reg.predict([X_sample])
        mse.append(np.sqrt(np.mean((y_pred - y_sample)**2)))
        p.update(i)
    p.update(len(y_valid))
    print('')
    print("Regression mse:")
    print(np.mean(mse), np.std(mse)/np.sqrt(len(y_valid)))

    print('Fit with all data')
    reg.fit(X,y)
    imporder = reg.feature_importances_.argsort()[::-1]
    for c,v in zip(tr.transform(train_df).columns.values[imporder], reg.feature_importances_[imporder]):
        print('{} : {}'.format(c,v))
    
    print('Transform test set...')
    test_df = test_df_orig.copy()
    predstd = cls.predict(tr.transform(test_df_orig).values)
#    test_df['prob0'] = predstd == 0
#    test_df['prob1'] = predstd == 1
#    test_df['prob2'] = predstd == 2
#    test_df['prob3'] = predstd == 3
    test_df['prob4'] = predstd == 4
    X = tr.transform(test_df).values

    print('Predict test set...')
    yp = reg.predict(X)
    RevenueCompetition.save_data(yp, 'data/revenue_20150426_01.csv')