def get_roll_params(): """ Creates environment and sets up the rollout params. """ env = MarketEnv("BAC", 3, is_eval=True, max_positions=10, train_test_split=0.8, max_episode_len=1000000, shares_to_buy=2000) max_path_length, ep_length_stop = env.l, env.l print('\nMAX PATH LENGTH, EP LENGTH STEP: {}, {}\n'.format(max_path_length, ep_length_stop)) return env, max_path_length, ep_length_stop
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import numpy as np from env import MarketEnv # import keras from keras.models import Sequential from keras.layers import Dense, Conv2D from keras.layers import LSTM, Dropout, Activation, Convolution2D, Convolution1D, MaxPooling2D, Flatten, GlobalMaxPooling1D from keras.optimizers import Adam from keras.utils import np_utils from expMetrix import ExperienceReplay from model import NerualModel from keras.optimizers import RMSprop from keras.models import model_from_json env = MarketEnv("data") epoch = 1000000 epsilon = 0.5 batch_size = 30 Neural = NerualModel() model = Neural.getModel() rms = RMSprop() model.compile(loss='mse', optimizer=rms) exp_replay = ExperienceReplay() for e in range(epoch): #loss = 0. game_over = False input_t = env.reset()
import os os.environ["CUDA_VISIBLE_DEVICES"]="0,1" import numpy as np from env import MarketEnv from dataformator import DataFormator env = MarketEnv("data") import tensorflow as tf import keras from keras.utils import np_utils from keras.models import Model from keras.models import Sequential from keras.layers import LSTM from keras.layers import Dense from keras.layers import concatenate from numpy import array import sys from keras.optimizers import RMSprop from keras.layers import Input,Dropout,Conv2D,MaxPooling2D,Flatten from keras import optimizers from os import path print("preparing model") ls1Ip = Input(shape=(30, 5, 1)) ls11 = Conv2D(64, (1, 1), padding='same', activation='relu')(ls1Ip) x_drop4 = Dropout(0.5)(ls11) ls12 = Conv2D(64, (2, 2), padding='same', activation='relu')(x_drop4) ls13 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(ls12) out = Flatten()(ls13)
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import numpy as np from env import MarketEnv import keras from keras.models import Sequential from keras.layers import Dense, Conv2D from keras.layers import LSTM, Dropout, Activation, Convolution2D, Convolution1D, MaxPooling2D, Flatten, GlobalMaxPooling1D from keras.optimizers import Adam from keras.utils import np_utils env = MarketEnv("data/20150917.txt") epoch = 1000000 epsilon = .5 X = np.linspace(-1, 1, 200) np.random.shuffle(X) s = (200, 5) Y = np.ones(s) X_train, Y_train = X[:160], Y[:160] model = Sequential() model.add(Dense(units=5, input_dim=1)) model.add(Activation('relu')) adam = Adam(lr=1e-4) model.compile(loss='mse', optimizer='sgd') #model.compile(optimizer=adam, #loss='categorical_crossentropy', #metrics=['accuracy'])
for f in filenames: files.append(f.replace(".csv", "")) break return files if __name__ == "__main__": s_and_p = [ 'ADI', 'AJG', 'APD', 'CVX', 'DLR', 'DVA', 'ETN', 'HES', 'INTU', 'IT', 'L', 'MAR', 'MET', 'MMM', 'NOC', 'NSC', 'PLD', 'SPGI', 'TJX', 'TMO' ] for stock in s_and_p: env = MarketEnv(dir_path="./split_data/train/", target_codes=stock, sudden_death_rate=0.3, finalIndex=997) #1259 pg = DeepQ(env, gamma=0.80, model_file_name="./model/model_" + stock + ".h5") pg.train() reward_stock = [] reward_stock_random = [] for stock in s_and_p: env = MarketEnv(dir_path="./split_data/test/", target_codes=stock, sudden_death_rate=0.3, finalIndex=256) test_obj = DeepQ(env,
def main(): nb_actions = 3 obs_size = 9 window_size = 10 batch_size = 2048 stock = "BAC" episode = 35 total_spent = 0 total_sold = 0 agent = Agent(window_size=window_size, action_size=nb_actions, batch_size=batch_size, gamma=0.95, epsilon=1.0, epsilon_decay=0.99, epsilon_min=0.001, learning_rate=0.001, is_eval=True, stock_name=stock, episode=episode) env = MarketEnv(stock, window_size=window_size, state_size=obs_size, account_balance=1000000, is_eval=True, shares_to_buy=100, max_positions=1000, train_test_split=.8) state = env.reset() for time in range(env.l): action = agent.act(state)[0] if action < 0: choice = 2 elif action > 0 and action[0] < 1: choice = 0 elif action > 1: choice = 1 next_state, action, reward, done = env.step(action, time) agent.remember(state, action, reward, next_state, done) state = next_state prices = [line[3] for line in env.prices] dates = [i for i in range(len(env.prices))] plt.plot(dates, prices) for line in env.buy: plt.plot(line[0], line[1], 'ro', color="g", markersize=2) total_spent += line[1] for line in env.sell: plt.plot(line[0], line[1], "ro", color="r", markersize=2) total_sold += line[1] percentage_gain = ((env.account_balance - env.starting_balance) / env.starting_balance) * 100 print("Profitable Trades: " + str(env.profitable_trades)) print("Unprofitable Trades: " + str(env.unprofitable_trades)) print("Percentage Gain: " + str(percentage_gain)) print("Amount Spent: " + str(total_spent)) print("Amount Sold: " + str(total_sold)) plt.show() plt.savefig("models/{}/{}-{}/{}".format(stock, stock, str(episode), stock))
def main(): window_size = 10 batch_size = 2048 episodes = 10000 max_episode_len = 39000 * 3 # One Year of trading in minutes stock = "BAC" args = { 'tau': .001, 'gamma': .99, 'lr_actor': .0001, 'lr_critic': .001, 'batch_size': max_episode_len } env = MarketEnv(stock, buy_position=3, window_size=window_size, account_balance=1000000, shares_to_buy=100, train_test_split=.8, max_episode_len=max_episode_len) agent = Agent(args, state_size=env.state_size, window_size=env.window_size, action_size=env.action_size, action_bound=env.action_bound[1], is_eval=False, stock_name=stock) episode_ave_max_q = 0 ep_reward = 0 for i in range(episodes): state = env.reset() for time in range(env.l): action = agent.act(state)[0] if action < 0: choice = 2 elif action > 0 and action[0] < 1: choice = 0 elif action > 1: choice = 1 next_state, reward, done = env.step(choice, time) agent.remember(state, action, reward, next_state, done) state = next_state # if agent.replay_buffer.size() == batch_size: # print("Replaying") # episode_ave_max_q += agent.replay(time, i, episode_ave_max_q) ep_reward += reward if done or time == env.l: episode_ave_max_q += agent.replay(time, i, episode_ave_max_q) break model_name = "{}-{}".format(stock, str(i)) path = "models/{}/{}/".format(stock, model_name) if i % 5 == 0: if not os.path.exists(path): os.makedirs(path) with open(os.path.join(path, 'LTYP.mif'), 'w'): pass agent.saver.save(agent.sess, path + model_name, global_step=i) summary_str = agent.sess.run(agent.summary_ops, feed_dict={ agent.summary_vars[0]: ep_reward, agent.summary_vars[1]: episode_ave_max_q }) agent.writer.add_summary(summary_str, i) agent.writer.flush() episode_ave_max_q = 0 ep_reward = 0 print('| Reward: {:d} | Episode: {:d} | Qmax: {:.4f}'.format( int(ep_reward), i, (episode_ave_max_q)))