print("Validating regression model...") p = progressbar.ProgressBar(maxval=len(y_valid)).start() mse = [] for i in range(len(y_valid)): X_sample = X_valid[i] y_sample = y_valid[i] y_pred = np.exp(reg.predict([X_sample])) mse.append(np.sqrt(np.mean((y_pred - y_sample)**2))) p.update(i) p.update(len(y_valid)) print('') print("Regression mse:") print(np.mean(mse), np.std(mse)/np.sqrt(len(y_valid))) print('Fit with all data') reg.fit(X,np.log(y)) print('Transform test set...') test_df = test_df_orig.copy() predstd = cls.predict(tr.transform(test_df_orig).values) # test_df['prob0'] = predstd == 0 # test_df['prob1'] = predstd == 1 # test_df['prob2'] = predstd == 2 # test_df['prob3'] = predstd == 3 test_df['prob4'] = predstd == 4 X = tr.transform(test_df).values print('Predict test set...') yp = np.exp(reg.predict(X)) RevenueCompetition.save_data(yp, 'data/revenue_20150431_03.csv')
for i in range(len(y_valid)): X_sample = X_valid[i] y_sample = y_valid[i] y_pred = reg.predict([X_sample]) mse.append(np.sqrt(np.mean((y_pred - y_sample)**2))) p.update(i) p.update(len(y_valid)) print('') print("Regression mse:") print(np.mean(mse), np.std(mse)/np.sqrt(len(y_valid))) print('Fit with all data') reg.fit(X,y) imporder = reg.feature_importances_.argsort()[::-1] for c,v in zip(tr.transform(train_df).columns.values[imporder], reg.feature_importances_[imporder]): print('{} : {}'.format(c,v)) print('Transform test set...') test_df = test_df_orig.copy() predstd = cls.predict(tr.transform(test_df_orig).values) # test_df['prob0'] = predstd == 0 # test_df['prob1'] = predstd == 1 # test_df['prob2'] = predstd == 2 # test_df['prob3'] = predstd == 3 test_df['prob4'] = predstd == 4 X = tr.transform(test_df).values print('Predict test set...') yp = reg.predict(X) RevenueCompetition.save_data(yp, 'data/revenue_20150426_01.csv')