def main(operation = 'train', dataset = 'GBP_USD_D.csv'): step = 30 input_size = 47 train_step = 3700 batch_size = 32 learning_rate = 0.02 hidden_size = 8 nclasses = 1 selector = ["ROCP", "OROCP", "HROCP", "LROCP", "MACD", "RSI", "UO", "BOLL", "MA", "STOCH", "AO"] input_shape = [30, 47] # [length of time series, length of feature] raw_data_io = rawdata.read_sample_data(dataset) moving_features, moving_labels = extract_feature(raw_data=raw_data_io, selector=selector, window=input_shape[0], with_label=True, flatten=False) moving_features = numpy.asarray(moving_features) moving_features = numpy.transpose(moving_features, [0, 2, 1]) moving_labels = numpy.asarray(moving_labels) moving_labels = numpy.reshape(moving_labels, [moving_labels.shape[0], 1]) if operation == 'train': trader = Robotrade(step, input_size, learning_rate, hidden_size, nclasses) trader.build_graph() train(trader, moving_features, moving_labels, train_step, batch_size) else: df = predict(moving_features, moving_labels) df.to_csv("./pred_csv_file_GU_D.csv", sep=',',index=False)
def extractfeatureonly_from_file(filepath, output_prefix): days_for_test = 100 input_shape = [30, 83] # [length of time series, length of feature] window = input_shape[0] fp = open("%s_feature_only.%s" % (output_prefix, window), "w") selector = [ "ROCP", "OROCP", "HROCP", "LROCP", "MACD", "RSI", "VROCP", "BOLL", "ADX", "MA", "VMA", "PRICE_VOLUME" ] raw_data = read_sample_data(filepath) moving_features = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0], with_label=False, flatten=True) print("feature extraction done, start writing to file...") train_end_test_begin = moving_features.shape[0] - days_for_test for i in range(train_end_test_begin, moving_features.shape[0]): for item in moving_features[i]: fp.write("%s\t" % item) fp.write("\n") fp.close()
def main(operation='train'): step = 30 input_size = 61 train_steps = 5000 batch_size = 32 learning_rate = 0.02 hidden_size = 8 nclasses = 1 selector = [ "ROCP", "OROCP", "HROCP", "LROCP", "MACD", "RSI", "VROCP", "BOLL", "MA", "VMA", "PRICE_VOLUME" ] input_shape = [30, 61] # [length of time series, length of feature] raw_data = read_sample_data("toy_stock.csv") moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0], with_label=True, flatten=False) moving_features = numpy.asarray(moving_features) moving_features = numpy.transpose(moving_features, [0, 2, 1]) moving_labels = numpy.asarray(moving_labels) moving_labels = numpy.reshape(moving_labels, [moving_labels.shape[0], 1]) if operation == 'train': trader = SmartTrader(step, input_size, learning_rate, hidden_size, nclasses) trader.build_graph() train(trader, moving_features, moving_labels, train_steps, batch_size) else: predict(moving_features, moving_labels)
def extract_feature_and_write_to_file(fp, lp, fpt, lpt, filepath, selector, window): raw_data = read_sample_data(filepath) moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=window, with_label=True, flatten=True) print("feature extraction done, start writing to file...") train_end_test_begin = moving_features.shape[0] - points_for_test if train_end_test_begin < 0: train_end_test_begin = 0 for i in range(0, train_end_test_begin): for item in moving_features[i]: fp.write("%s\t" % item) fp.write("\n") for i in range(0, train_end_test_begin): lp.write("%s\n" % moving_labels[i]) # test set for i in range(train_end_test_begin, moving_features.shape[0]): for item in moving_features[i]: fpt.write("%s\t" % item) fpt.write("\n") for i in range(train_end_test_begin, moving_features.shape[0]): lpt.write("%s\n" % moving_labels[i])
def extract_from_file(filepath, output_prefix, window=30): days_for_test = 700 # input_shape = [30, 61] # [length of time series, length of feature] # input_shape[0] fp = open("%s_feature" % output_prefix, "w") # lp = open("%s_label.%s" % (output_prefix, window), "w") # fpt = open("%s_feature.test.%s" % (output_prefix, window), "w") # lpt = open("%s_label.test.%s" % (output_prefix, window), "w") selector = [ "ROCP", "OROCP", "HROCP", "LROCP", "MACD", "RSI", "VROCP", "BOLL", "MA", "VMA", "PRICE_VOLUME", "CROSS_PRICE" ] raw_data = read_sample_data(filepath) moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=window, with_label=True, flatten=False) print("feature extraction done, start writing to file...") train_end_test_begin = moving_features.shape[0] - days_for_test if train_end_test_begin < 0: train_end_test_begin = 0 train_set = { "code": output_prefix, "feature": moving_features[:train_end_test_begin], "label": moving_labels[:train_end_test_begin] } pickle.dump(train_set, fp, 2) test_set = { "code": output_prefix, "feature": moving_features[train_end_test_begin:], "label": moving_labels[train_end_test_begin:] } pickle.dump(test_set, fp, 2) ''' for i in range(0, train_end_test_begin): for item in moving_features[i]: fp.write("%s\t" % item) fp.write("\n") for i in range(0, train_end_test_begin): lp.write("%s\n" % moving_labels[i]) # test set for i in range(train_end_test_begin, moving_features.shape[0]): for item in moving_features[i]: fpt.write("%s\t" % item) fpt.write("\n") for i in range(train_end_test_begin, moving_features.shape[0]): lpt.write("%s\n" % moving_labels[i]) ''' fp.close()
def train_with_shuffled_dataset(trader, batch_size, dataset_dir, dir_list, hidden_size, input_shape, input_size, keep_rate, learning_rate, nclasses, selector, step, train_steps, validation_size): train_features = [] train_labels = [] val_features = [] val_labels = [] for filename in dir_list: # if filename != '000001.csv': # continue print("processing file: " + filename) filepath = os.path.join(dataset_dir, filename) raw_data = read_sample_data(filepath) moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0], with_label=True, flatten=False) train_features.extend(moving_features[:-validation_size]) train_labels.extend(moving_labels[:-validation_size]) val_features.extend(moving_features[-validation_size:]) val_labels.extend(moving_labels[-validation_size:]) train_features = numpy.transpose(numpy.asarray(train_features), [0, 2, 1]) train_labels = numpy.asarray(train_labels) train_labels = numpy.reshape(train_labels, [train_labels.shape[0], 1]) val_features = numpy.transpose(numpy.asarray(val_features), [0, 2, 1]) val_labels = numpy.asarray(val_labels) val_labels = numpy.reshape(val_labels, [val_labels.shape[0], 1]) train_set = DataSet(train_features, train_labels) val_set = DataSet(val_features, val_labels) # raw_data = read_sample_data("toy_stock.csv") # moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0], # with_label=True, flatten=False) # moving_features = numpy.asarray(moving_features) # moving_features = numpy.transpose(moving_features, [0, 2, 1]) # moving_labels = numpy.asarray(moving_labels) # moving_labels = numpy.reshape(moving_labels, [moving_labels.shape[0], 1]) # train_set = DataSet(moving_features[:-validation_size], moving_labels[:-validation_size]) # val_set = DataSet(moving_features[-validation_size:], moving_labels[-validation_size:]) train(trader, train_set, val_set, train_steps, batch_size=batch_size, keep_rate=keep_rate)
def extract_from_file(filepath, output_prefix): days_for_test = 700 input_shape = [30, 61] # [length of time series, length of feature] window = input_shape[0] fp = open("%s_feature.%s" % (output_prefix, window), "w") lp = open("%s_label.%s" % (output_prefix, window), "w") fpt = open("%s_feature.test.%s" % (output_prefix, window), "w") lpt = open("%s_label.test.%s" % (output_prefix, window), "w") selector = [ "ROCP", "OROCP", "HROCP", "LROCP", "MACD", "RSI", "VROCP", "BOLL", "MA", "VMA", "PRICE_VOLUME" ] raw_data = read_sample_data(filepath) moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0], with_label=True, flatten=True) print("feature extraction done, start writing to file...") train_end_test_begin = moving_features.shape[0] - days_for_test if train_end_test_begin < 0: train_end_test_begin = 0 for i in range(0, train_end_test_begin): for item in moving_features[i]: fp.write("%s\t" % item) fp.write("\n") for i in range(0, train_end_test_begin): lp.write("%s\n" % moving_labels[i]) # test set for i in range(train_end_test_begin, moving_features.shape[0]): for item in moving_features[i]: fpt.write("%s\t" % item) fpt.write("\n") for i in range(train_end_test_begin, moving_features.shape[0]): lpt.write("%s\n" % moving_labels[i]) fp.close() lp.close() fpt.close() lpt.close()
def extract_with_filename(filepath, selector, test_percentage=0.2, prospective=1, window=30, N_predict=3): raw_data = read_sample_data(filepath) All_train, predict_data = extract_feature(raw_data=raw_data, selector=selector, prospective=prospective, window=window, N_predict=N_predict, flatten=False) print("feature extraction done, start writing to file...") train_data, test_data = Train_test(All_train, test_percentage) return train_data, test_data, predict_data
fp = open("ultimate_feature.%s" % window, "w") lp = open("ultimate_label.%s" % window, "w") fpt = open("ultimate_feature.test.%s" % window, "w") lpt = open("ultimate_label.test.%s" % window, "w") selector = [ "ROCP", "OROCP", "HROCP", "LROCP", "MACD", "RSI", "VROCP", "BOLL", "ADX", "MA", "VMA", "PRICE_VOLUME" ] dataset_dir = "./dataset" for filename in os.listdir(dataset_dir): #if filename != '000001.csv': # continue print("processing file: " + filename) filepath = dataset_dir + "/" + filename raw_data = read_sample_data(filepath) moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0], with_label=True, flatten=True) print("feature extraction done, start writing to file...") train_end_test_begin = moving_features.shape[0] - days_for_test if train_end_test_begin < 0: train_end_test_begin = 0 for i in range(0, train_end_test_begin): for item in moving_features[i]: fp.write("%s\t" % item) fp.write("\n") for i in range(0, train_end_test_begin): lp.write("%s\n" % moving_labels[i])
def main(operation='train', code=None): step = 30 input_size = 61 # train_steps = 1000000 train_steps = 2 batch_size = 512 learning_rate = 0.001 hidden_size = 14 nclasses = 1 validation_size = 700 keep_rate = 0.7 selector = [ "ROCP", "OROCP", "HROCP", "LROCP", "MACD", "RSI", "VROCP", "BOLL", "MA", "VMA", "PRICE_VOLUME" ] input_shape = [30, 61] # [length of time series, length of feature] if operation == 'train': dataset_dir = "./dataset" train_features = [] train_labels = [] val_features = [] val_labels = [] for filename in os.listdir(dataset_dir): # if filename != '000001.csv': # continue print("processing file: " + filename) filepath = dataset_dir + "/" + filename raw_data = read_sample_data(filepath) moving_features, moving_labels = extract_feature( raw_data=raw_data, selector=selector, window=input_shape[0], with_label=True, flatten=False) train_features.extend(moving_features[:-validation_size]) train_labels.extend(moving_labels[:-validation_size]) val_features.extend(moving_features[-validation_size:]) val_labels.extend(moving_labels[-validation_size:]) train_features = numpy.transpose(numpy.asarray(train_features), [0, 2, 1]) train_labels = numpy.asarray(train_labels) train_labels = numpy.reshape(train_labels, [train_labels.shape[0], 1]) val_features = numpy.transpose(numpy.asarray(val_features), [0, 2, 1]) val_labels = numpy.asarray(val_labels) val_labels = numpy.reshape(val_labels, [val_labels.shape[0], 1]) train_set = DataSet(train_features, train_labels) val_set = DataSet(val_features, val_labels) # raw_data = read_sample_data("toy_stock.csv") # moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0], # with_label=True, flatten=False) # moving_features = numpy.asarray(moving_features) # moving_features = numpy.transpose(moving_features, [0, 2, 1]) # moving_labels = numpy.asarray(moving_labels) # moving_labels = numpy.reshape(moving_labels, [moving_labels.shape[0], 1]) # train_set = DataSet(moving_features[:-validation_size], moving_labels[:-validation_size]) # val_set = DataSet(moving_features[-validation_size:], moving_labels[-validation_size:]) trader = SmartTrader(step, input_size, learning_rate, hidden_size, nclasses) trader.build_graph() train(trader, train_set, val_set, train_steps, batch_size=batch_size, keep_rate=keep_rate) elif operation == "predict": predict_file_path = "./dataset/000001.csv" if code is not None: predict_file_path = "./dataset/%s.csv" % code print("processing file %s" % predict_file_path) raw_data = read_sample_data(predict_file_path) moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0], with_label=True, flatten=False) moving_features = numpy.asarray(moving_features) moving_features = numpy.transpose(moving_features, [0, 2, 1]) moving_labels = numpy.asarray(moving_labels) moving_labels = numpy.reshape(moving_labels, [moving_labels.shape[0], 1]) # train_set = DataSet(moving_features[:-validation_size], moving_labels[:-validation_size]) val_set = DataSet(moving_features[-validation_size:], moving_labels[-validation_size:]) predict(val_set, step=step, input_size=input_size, learning_rate=learning_rate, hidden_size=hidden_size, nclasses=nclasses) else: print("Operation not supported. ")
def main(operation='train', code=None): step = 30 input_size = 61 train_epoch = 100000 train_steps = 1000 batch_size = 512 learning_rate = 0.001 hidden_size = 14 nclasses = 1 validation_size = 100 keep_rate = 0.7 selector = [ "ROCP", "OROCP", "HROCP", "LROCP", "MACD", "RSI", "VROCP", "BOLL", "MA", "VMA", "PRICE_VOLUME" ] input_shape = [step, input_size] # [length of time series, length of feature] if operation == 'train': trader = SmartTrader(step, input_size, learning_rate, hidden_size, nclasses) trader.build_graph() dataset_dir = "./dataset/debug/" for i in range(train_epoch): print('shuffle training data') dir_list = os.listdir(dataset_dir) random.shuffle(dir_list) random.shuffle(dir_list) random.shuffle(dir_list) random.shuffle(dir_list) random.shuffle(dir_list) train_with_shuffled_dataset(trader, batch_size, dataset_dir, dir_list[0:10], hidden_size, input_shape, input_size, keep_rate, learning_rate, nclasses, selector, step, train_steps, validation_size) elif operation == "predict": predict_file_path = "./dataset/000001.csv" if code is not None: predict_file_path = "./dataset/%s.csv" % code print("processing file %s" % predict_file_path) raw_data = read_sample_data(predict_file_path) moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0], with_label=True, flatten=False) moving_features = numpy.asarray(moving_features) moving_features = numpy.transpose(moving_features, [0, 2, 1]) moving_labels = numpy.asarray(moving_labels) moving_labels = numpy.reshape(moving_labels, [moving_labels.shape[0], 1]) # train_set = DataSet(moving_features[:-validation_size], moving_labels[:-validation_size]) val_set = DataSet(moving_features[-validation_size:], moving_labels[-validation_size:]) predict(val_set, step=step, input_size=input_size, learning_rate=learning_rate, hidden_size=hidden_size, nclasses=nclasses) else: print("Operation not supported. ")