Exemple #1
0
def ridge_submit():
	logger = mylogger()
	logger.info('RidgeRegression start')
	logger.debug('make_train_data start')
	#train = pd.read_csv('../result_tmp/scaled_train.csv')
	train = pd.read_csv('../result_tmp/scaled_train_DateBlockNum.csv')
	#train = train[train['date_block_num']==33]  #直近1ヶ月
	train = train.loc[(30<train['date_block_num'])&(train['date_block_num']<=33)]  #直近3m
	
	y = train['item_cnt_month']
	X = train.drop(['item_cnt_month', 'date_block_num'], axis=1).values
	#X = train.drop(['item_cnt_month'], axis=1).values
	#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
	logger.debug('make_train_data end')

	logger.info('Fitting start')
	ridge = Ridge()
	ridge.fit(X, y)
	logger.debug('Fitting end')

	logger.info('Scoring start')
	#logger.info('Accuracy on test set: {:.3f}'.format(.score(X_test, y_test)))
	test_data = load_test_data()
	test = test_data.drop(['ID'], axis=1).values
	
	submission = load_submission()
	submission['item_cnt_month'] = ridge.predict(test).astype(np.float16).clip(0., 20.)
	submission.to_csv('../result_tmp/submit_180902_31-33_ridge.csv', encoding='utf-8-sig', index=False)
	logger.info('submission:\n{}'.format(submission.head()))
	logger.debug('RidgeRegression end')
	logger.debug('====================')
Exemple #2
0
def RandomForest():
    logger.info('RandomForestRegressor start')
    logger.debug('make_train_data start')
    train = pd.read_csv('./result_tmp/scaled_train.csv')

    y = train['item_cnt_month']
    X = train.drop(['item_cnt_month'], axis=1).values
    #X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    logger.debug('make_train_dat end')

    logger.info('Fitting start')
    forest = RandomForestRegressor(n_estimators=50, random_state=1)
    forest.fit(X, y)
    logger.debug('Fitting end')

    logger.info('Scoring start')
    #logger.info('Accuracy on test set: {:.3f}'.format(forest.score(X_test, y_test)))
    test_data = load_test_data()
    test = test_data.drop(['ID'], axis=1).values

    submission = load_submission()
    submission['item_cnt_month'] = forest.predict(test).astype(
        np.float16).clip(0., 20.)
    submission.to_csv('./result_tmp/submit_180826_1st.csv',
                      encoding='utf-8-sig',
                      index=False)
    logger.info('submission:\n{}'.format(submission.head()))
    logger.debug('RandomForestRegressor end')
    logger.debug('====================')
Exemple #3
0
def make_train_in_test():
	logger.info('train in test starts')
	train = load_train_data()
	test = load_test_data()
	logger.info('train.org.shape:{}'.format(train.shape))
	test_shops = test.shop_id.unique()
	test_items = test.item_id.unique()
	train = train[train.shop_id.isin(test_shops)]
	train = train[train.item_id.isin(test_items)]
	train['date'] = pd.to_datetime(train['date'], format='%d.%m.%Y')
	train['month'] = train['date'].dt.month  #TimeSeries。月のみの抽出。
	logger.info('train in test.shape:{}'.format(train.shape))
	logger.debug('train in test ends')
	return train, test
Exemple #4
0
def forest_submit():
    logger = mylogger()
    logger.info('RandomForestRegressor start')
    logger.debug('make_train_data start')
    #train = pd.read_csv('./result_tmp/scaled_train.csv')
    train = pd.read_csv('./result_tmp/scaled_train_DateBlockNum.csv')
    #train = train[train['date_block_num']==33]  #直近1ヶ月
    train = train.loc[(30 < train['date_block_num']) &
                      (train['date_block_num'] <= 33)]  #直近3m

    y = train['item_cnt_month']
    X = train.drop(['item_cnt_month', 'date_block_num'], axis=1).values
    #X = train.drop(['item_cnt_month'], axis=1).values
    #X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    logger.debug('make_train_data end')

    logger.info('Fitting start')
    forest = RandomForestRegressor(n_estimators=50, random_state=1)
    forest.fit(X, y)
    logger.debug('Fitting end')

    #EDAしたいとき
    #fti = forest.feature_importances_
    #print('Feature Importances:')
    #for i, feature in enumerate(train.colunms):
    #	print('\t{0:10s}:{1:>.6f}'.format(feature, fti[i]))

    logger.info('Scoring start')
    #logger.info('Accuracy on test set: {:.3f}'.format(.score(X_test, y_test)))
    test_data = load_test_data()
    test = test_data.drop(['ID'], axis=1).values

    submission = load_submission()
    submission['item_cnt_month'] = forest.predict(test).astype(
        np.float16).clip(0., 20.)
    #submission.to_csv('./result_tmp/submit_180826_1st.csv', encoding='utf-8-sig', index=False)
    submission.to_csv('./result_tmp/submit_180827_31-33.csv',
                      encoding='utf-8-sig',
                      index=False)
    logger.info('submission:\n{}'.format(submission.head()))
    logger.debug('RandomForestRegressor end')
    logger.debug('====================')
Exemple #5
0
	logger.debug('make_train_data end')

	logger.info('Fitting start')
    forest = RandomForestRegressor(n_estimators=50, random_state=1)
	forest.fit(X, y)
	logger.debug('Fitting end')
	
    #EDAしたいとき
	#fti = forest.feature_importances_
	#print('Feature Importances:')
	#for i, feature in enumerate(train.colunms):
	#	print('\t{0:10s}:{1:>.6f}'.format(feature, fti[i]))
    
	logger.info('Scoring start')
	#logger.info('Accuracy on test set: {:.3f}'.format(.score(X_test, y_test)))
	test_data = load_test_data()
	test = test_data.drop(['ID'], axis=1).values
	
	submission = load_submission()
	submission['item_cnt_month'] = forest.predict(test).astype(np.float16).clip(0., 20.)
	#submission.to_csv('./result_tmp/submit_180826_1st.csv', encoding='utf-8-sig', index=False)
	submission.to_csv('./result_tmp/submit_180827_31-33.csv', encoding='utf-8-sig', index=False)
	logger.info('submission:\n{}'.format(submission.head()))
	logger.debug('RandomForestRegressor end')
	logger.debug('====================')


# CV: KFold, StratifiedKFold, TimeSeriesSplit, GroupKFold
# [http://scikit-learn.org/stable/modules/cross_validation.html]
# TimeSeriesSplit()によるCV
# [http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.TimeSeriesSplit.html]
            accuracy = sess.run(self.accuracy_op, feed_dict=feed_dict)
            eval_accuracy += accuracy
            eval_iter += 1
        return eval_accuracy / eval_iter
    
    
    
    
    
num_training = 49000
num_validation = 50000 - num_training
num_test = 10000

# Load cifar-10 data
X_train, Y_train, X_val, Y_val = load_train_data()
X_test, Y_test = load_test_data()


print X_train.shape

# Clear old computation graphs
tf.reset_default_graph()

sess = tf.Session()

model = imageModel()
model.train(sess, X_train, Y_train, X_val, Y_val)
accuracy = model.evaluate(sess, X_test, Y_test)
print('***** test accuracy: %.3f' % accuracy)

# Save your model