def ridge_submit(): logger = mylogger() logger.info('RidgeRegression start') logger.debug('make_train_data start') #train = pd.read_csv('../result_tmp/scaled_train.csv') train = pd.read_csv('../result_tmp/scaled_train_DateBlockNum.csv') #train = train[train['date_block_num']==33] #直近1ヶ月 train = train.loc[(30<train['date_block_num'])&(train['date_block_num']<=33)] #直近3m y = train['item_cnt_month'] X = train.drop(['item_cnt_month', 'date_block_num'], axis=1).values #X = train.drop(['item_cnt_month'], axis=1).values #X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) logger.debug('make_train_data end') logger.info('Fitting start') ridge = Ridge() ridge.fit(X, y) logger.debug('Fitting end') logger.info('Scoring start') #logger.info('Accuracy on test set: {:.3f}'.format(.score(X_test, y_test))) test_data = load_test_data() test = test_data.drop(['ID'], axis=1).values submission = load_submission() submission['item_cnt_month'] = ridge.predict(test).astype(np.float16).clip(0., 20.) submission.to_csv('../result_tmp/submit_180902_31-33_ridge.csv', encoding='utf-8-sig', index=False) logger.info('submission:\n{}'.format(submission.head())) logger.debug('RidgeRegression end') logger.debug('====================')
def RandomForest(): logger.info('RandomForestRegressor start') logger.debug('make_train_data start') train = pd.read_csv('./result_tmp/scaled_train.csv') y = train['item_cnt_month'] X = train.drop(['item_cnt_month'], axis=1).values #X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) logger.debug('make_train_dat end') logger.info('Fitting start') forest = RandomForestRegressor(n_estimators=50, random_state=1) forest.fit(X, y) logger.debug('Fitting end') logger.info('Scoring start') #logger.info('Accuracy on test set: {:.3f}'.format(forest.score(X_test, y_test))) test_data = load_test_data() test = test_data.drop(['ID'], axis=1).values submission = load_submission() submission['item_cnt_month'] = forest.predict(test).astype( np.float16).clip(0., 20.) submission.to_csv('./result_tmp/submit_180826_1st.csv', encoding='utf-8-sig', index=False) logger.info('submission:\n{}'.format(submission.head())) logger.debug('RandomForestRegressor end') logger.debug('====================')
def make_train_in_test(): logger.info('train in test starts') train = load_train_data() test = load_test_data() logger.info('train.org.shape:{}'.format(train.shape)) test_shops = test.shop_id.unique() test_items = test.item_id.unique() train = train[train.shop_id.isin(test_shops)] train = train[train.item_id.isin(test_items)] train['date'] = pd.to_datetime(train['date'], format='%d.%m.%Y') train['month'] = train['date'].dt.month #TimeSeries。月のみの抽出。 logger.info('train in test.shape:{}'.format(train.shape)) logger.debug('train in test ends') return train, test
def forest_submit(): logger = mylogger() logger.info('RandomForestRegressor start') logger.debug('make_train_data start') #train = pd.read_csv('./result_tmp/scaled_train.csv') train = pd.read_csv('./result_tmp/scaled_train_DateBlockNum.csv') #train = train[train['date_block_num']==33] #直近1ヶ月 train = train.loc[(30 < train['date_block_num']) & (train['date_block_num'] <= 33)] #直近3m y = train['item_cnt_month'] X = train.drop(['item_cnt_month', 'date_block_num'], axis=1).values #X = train.drop(['item_cnt_month'], axis=1).values #X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) logger.debug('make_train_data end') logger.info('Fitting start') forest = RandomForestRegressor(n_estimators=50, random_state=1) forest.fit(X, y) logger.debug('Fitting end') #EDAしたいとき #fti = forest.feature_importances_ #print('Feature Importances:') #for i, feature in enumerate(train.colunms): # print('\t{0:10s}:{1:>.6f}'.format(feature, fti[i])) logger.info('Scoring start') #logger.info('Accuracy on test set: {:.3f}'.format(.score(X_test, y_test))) test_data = load_test_data() test = test_data.drop(['ID'], axis=1).values submission = load_submission() submission['item_cnt_month'] = forest.predict(test).astype( np.float16).clip(0., 20.) #submission.to_csv('./result_tmp/submit_180826_1st.csv', encoding='utf-8-sig', index=False) submission.to_csv('./result_tmp/submit_180827_31-33.csv', encoding='utf-8-sig', index=False) logger.info('submission:\n{}'.format(submission.head())) logger.debug('RandomForestRegressor end') logger.debug('====================')
logger.debug('make_train_data end') logger.info('Fitting start') forest = RandomForestRegressor(n_estimators=50, random_state=1) forest.fit(X, y) logger.debug('Fitting end') #EDAしたいとき #fti = forest.feature_importances_ #print('Feature Importances:') #for i, feature in enumerate(train.colunms): # print('\t{0:10s}:{1:>.6f}'.format(feature, fti[i])) logger.info('Scoring start') #logger.info('Accuracy on test set: {:.3f}'.format(.score(X_test, y_test))) test_data = load_test_data() test = test_data.drop(['ID'], axis=1).values submission = load_submission() submission['item_cnt_month'] = forest.predict(test).astype(np.float16).clip(0., 20.) #submission.to_csv('./result_tmp/submit_180826_1st.csv', encoding='utf-8-sig', index=False) submission.to_csv('./result_tmp/submit_180827_31-33.csv', encoding='utf-8-sig', index=False) logger.info('submission:\n{}'.format(submission.head())) logger.debug('RandomForestRegressor end') logger.debug('====================') # CV: KFold, StratifiedKFold, TimeSeriesSplit, GroupKFold # [http://scikit-learn.org/stable/modules/cross_validation.html] # TimeSeriesSplit()によるCV # [http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html]
accuracy = sess.run(self.accuracy_op, feed_dict=feed_dict) eval_accuracy += accuracy eval_iter += 1 return eval_accuracy / eval_iter num_training = 49000 num_validation = 50000 - num_training num_test = 10000 # Load cifar-10 data X_train, Y_train, X_val, Y_val = load_train_data() X_test, Y_test = load_test_data() print X_train.shape # Clear old computation graphs tf.reset_default_graph() sess = tf.Session() model = imageModel() model.train(sess, X_train, Y_train, X_val, Y_val) accuracy = model.evaluate(sess, X_test, Y_test) print('***** test accuracy: %.3f' % accuracy) # Save your model