def get_test_performance(epoch, modelFilename='model.h5', model=None): import codecs codeListFilename = 'input_code.csv' codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path = "./If_index/", target_codes = \ codeMap.keys(), start_date = "2015-05-29", \ end_date = "2016-08-25", sudden_death = -1.0) target_close = env.get_close() from keras.optimizers import SGD if (model == None and modelFilename == 'model_dqn.h5'): model = MarketModelBuilder(modelFilename).getModel() elif (model == None and modelFilename == 'model_pg.h5'): model = MarketPolicyGradientModelBuilder(modelFilename).getModel() loss = 0. game_over = False # get initial input input_t = env.reset() cumReward = 0 cum_profit = {} pre_action = {} while not game_over: input_tm1 = input_t q = model.predict(input_tm1) action = np.argmax(q[0]) input_t, reward, game_over, info = env.step(action) cumReward += reward cum_profit[info["dt"]] = cumReward if env.actions[action] == "LONG" or env.actions[action] == "SHORT": pre_action[info['dt']] = env.actions[action] color = bcolors.FAIL if env.actions[ action] == "LONG" else bcolors.OKBLUE print "%s:\t%s\t%d\t%.2f\t%.2f\t" % (info["dt"], color + env.actions[action] + \ bcolors.ENDC, info['correct_action'], cumReward, info["cum"]) + ("\t".join(["%s:%.2f" % (l, i)\ for l, i in zip(env.actions, q[0].tolist())]) ) print len(cum_profit.keys()), len(target_close) plot_profit(cum_profit, target_close, pre_action, "test_" + str(epoch)) return cum_profit, target_close
target_codes=list(codeMap.keys()), input_codes=[], start_date="2013-08-26", end_date="2015-08-25", sudden_death=-1.0) # parameters epsilon = .5 # exploration min_epsilon = 0.1 epoch = 100000 max_memory = 5000 batch_size = 128 discount = 0.8 from keras.optimizers import SGD model = MarketModelBuilder(modelFilename).getModel() sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mse', optimizer='rmsprop') # Initialize experience replay object exp_replay = ExperienceReplay(max_memory=max_memory, discount=discount) # Train win_cnt = 0 for e in range(epoch): loss = 0. env.reset() game_over = False # get initial input input_t = env.reset() cumReward = 0
from market_model_builder import MarketModelBuilder from keras.optimizers import SGD from environment import MarketEnv import numpy as np model_filename = "model.h5" model = MarketModelBuilder(model_filename).getModel() sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mse', optimizer='rmsprop') model.load_weights("model.h5") env = MarketEnv(target_symbols=["1"], input_symbols=[], start_date="2015/04/27/", end_date="2018/04/27", sudden_death=-1.0) data = env.get_data(symbol="1") data_list = list(data.values()) prediction = [] for j in range(len(data_list)): if j < 59: continue state = [] subject = [] subject_vol = [] for i in range(60): subject.append([data_list[j - i][2]])
f.close() env = MarketEnv(target_symbols=list(instruments.keys()), input_symbols=[], start_date="2010-08-26", end_date="2015-08-25", sudden_death=-1.0) # parameters epsilon = 0.5 # exploration min_epsilon = 0.1 epoch = 100000 max_memory = 5000 batch_size = 128 discount = 0.8 model = MarketModelBuilder(model_filename).getModel() sgd = SGD(lr = 0.001, decay = 1e-6, momentum = 0.9, nesterov = True) model.compile(loss='mse', optimizer='rmsprop') # Initialize experience replay object exp_replay = ExperienceReplay(max_memory = max_memory, discount = discount) # Train win_cnt = 0 for e in range(epoch): loss = 0. env._reset() game_over = False # get initial input input_t = env._reset() cumReward = 0
def train(): import sys import codecs codeListFilename = sys.argv[1] modelFilename = sys.argv[2] if len(sys.argv) > 2 else None codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path = "./If_index/", target_codes = \ codeMap.keys(), start_date = "2013-05-26", \ end_date = "2015-08-25", sudden_death = -1.0) target_close = env.get_close() # parameters epsilon = .45 # exploration min_epsilon = 0.1 epoch = 1000 max_memory = 5000 batch_size = 256 discount = 0.9 update_target_weight_step = 10 from keras.optimizers import SGD model = MarketModelBuilder(modelFilename).getModel() target_model = MarketModelBuilder(modelFilename).getModel() sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mse', optimizer='rmsprop') target_model.compile(loss='mse', optimizer='rmsprop') # Initialize experience replay object exp_replay = ExperienceReplay(max_memory=max_memory, discount=discount) # Train win_cnt = 0 for e in range(epoch): loss = 0. env.reset() game_over = False # get initial input input_t = env.reset() cumReward = 0 cum_profit = {} pre_action = {} iter_cnt = 0 while not game_over: input_tm1 = input_t isRandom = False # get next action if np.random.rand() <= epsilon: action = np.random.randint(0, env.action_space.n, size=1)[0] isRandom = True else: q = model.predict(input_tm1) action = np.argmax(q[0]) if np.nan in q: print "OCCUR NaN!!!" exit() # apply action, get rewards and new state input_t, reward, game_over, info = env.step(action) cumReward += reward cum_profit[info["dt"]] = cumReward if env.actions[action] == "LONG" or env.actions[action] == "SHORT": color = bcolors.FAIL if env.actions[ action] == "LONG" else bcolors.OKBLUE pre_action[info['dt']] = env.actions[action] if isRandom: color = bcolors.WARNING if env.actions[ action] == "LONG" else bcolors.OKGREEN print "%s:\t%s\t%d\t%.5f\t%.2f\t" % (info["dt"], color + env.actions[action] + \ bcolors.ENDC, info['correct_action'], cumReward, info["cum"]) + ("\t".join(["%s:%.2f" % (l, i)\ for l, i in zip(env.actions, q[0].tolist())]) if isRandom == False else "") # store experience exp_replay.remember([input_tm1, action, reward, input_t], game_over) # adapt model if (len(exp_replay.memory) >= batch_size): inputs, targets = exp_replay.get_batch(target_model, batch_size=batch_size) loss += model.train_on_batch(inputs, targets) if (iter_cnt % update_target_weight_step == 0 ): # update target estimator every 5 step print "update target model weights" target_model = copy_model_weight(model, target_model) iter_cnt += 1 if cumReward > 0 and game_over: win_cnt += 1 test_util.plot_profit(cum_profit, target_close, pre_action, "dqn_train_" + str(e)) test_util.get_test_performance(e, 'model_dqn.h5', model) print("Epoch {:03d}/{} | Loss {:.4f} | Win count {} | Epsilon {:.4f}". format(e, epoch, loss, win_cnt, epsilon)) # Save trained model weights and architecture, this will be used by the visualization code if (e / 5 == 0 and e != 0): model.save_weights( "model.h5" if modelFilename == None else modelFilename, overwrite=True) epsilon = max(min_epsilon, epsilon * 0.99)
env = MarketEnv(target_symbols=list(instruments.keys()), input_symbols=[], start_date="2015/04/27", end_date="2018/04/27", sudden_death=-1.0) # parameters epsilon = 0.5 # exploration min_epsilon = 0.1 epoch = 100000 max_memory = 5000 batch_size = 128 discount = 0.8 model = MarketModelBuilder(model_filename).getModel() model.summary() sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mse', optimizer='rmsprop') model.load_weights("model.h5") # Initialize experience replay object exp_replay = ExperienceReplay(max_memory=max_memory, discount=discount) # Train win_cnt = 0 for e in range(epoch): loss = 0. env.reset() game_over = False # get initial input