def restore(self): self.start_session() nb_classes = len(self.testing_stocks) + 1 action_dim, state_dim = [nb_classes], [nb_classes, self.window_length] variable_scope = get_variable_scope(self.window_length, self.predictor_type, self.use_batch_norm) with tf.variable_scope(variable_scope): actor = StockActor( sess=self.sess, feature_number=self.feature_number, state_dim=state_dim, action_dim=action_dim, action_bound=self.action_bound, learning_rate=self.config["training"]["actor learning rate"], decay_rate=self.config["training"]["actor decay rate"], decay_steps=self.config["training"]["actor decay steps"], weight_decay=self.config["training"]["actor weight decay"], tau=self.tau, batch_size=self.batch_size, predictor_type=self.predictor_type, use_batch_norm=self.use_batch_norm, activation_function=self.activation_function) critic = StockCritic( sess=self.sess, feature_number=self.feature_number, state_dim=state_dim, action_dim=action_dim, tau=self.tau, learning_rate=self.config["training"]["critic learning rate"], decay_rate=self.config["training"]["critic decay rate"], decay_steps=self.config["training"]["critic decay steps"], weight_decay=self.config["training"]["critic weight decay"], num_actor_vars=actor.get_num_trainable_vars(), predictor_type=self.predictor_type, use_batch_norm=self.use_batch_norm, activation_function=self.activation_function) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) print(self.model_save_path) ddpg_model = DDPG(env=None, sess=self.sess, actor=actor, critic=critic, actor_noise=actor_noise, obs_normalizer=obs_normalizer, config=self.config, model_save_path=self.model_save_path, summary_path=self.summary_path) ddpg_model.initialize(load_weights=True, verbose=False) self.model = ddpg_model
def predict_next_day(env, sess, actor, critic, actor_noise, norm_func=None): ddpg_model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=norm_func, config_file='config/stock.json', model_save_path=model_save_path, summary_path=summary_path) ddpg_model.initialize(load_weights=True) env = PortfolioEnv(last_history, target_stocks, steps=0, window_length=window_length, start_idx=0, trading_cost=0.0, sample_start_date='2019-12-26') print("data=", last_history) observation = env.get_last_observation() # print("observation before normalization={}, shape={}".format(observation, observation.shape)) # observation = observation[:, :, 3] / observation[:, :, 0] # print("observation after normalization={}, shape={}".format(observation, observation.shape)) # observation = np.expand_dims(observation, axis=-1) # print("observation after dims expand={}, shape={}".format(observation, observation.shape)) action = ddpg_model.predict_single(observation) # action = np.squeeze(action, axis=0) # observation, _, done, _ = env.step(action) print("action=", action)
use_batch_norm) summary_path = get_result_path(window_length, predictor_type, use_batch_norm) variable_scope = get_variable_scope(window_length, predictor_type, use_batch_norm) with tf.variable_scope(variable_scope): sess = tf.Session() actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size, predictor_type, use_batch_norm) critic = StockCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3, learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(), predictor_type=predictor_type, use_batch_norm=use_batch_norm) ddpg_model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=obs_normalizer, config_file='config/stock.json', model_save_path=model_save_path, summary_path=summary_path) ddpg_model.initialize(load_weights=False) ddpg_model.train()
if __name__ == '__main__': env = gym.make('Pendulum-v0') sess = tf.Session() action_dim = [1] state_dim = [3] batch_size = 64 action_bound = 2. tau = 1e-3 actor = PendulumActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size) critic = PendulumCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3, learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars()) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) ddpg_model = DDPG(env, sess, actor, critic, actor_noise, action_processor=None, config_file='config/pendulum.json', model_save_path='weights/pendulum/checkpoint.ckpt', summary_path='results/pendulum/') ddpg_model.initialize(load_weights=True) # ddpg_model.train() test_model(env, ddpg_model, 10)
if __name__ == '__main__': sess = tf.Session() env = gym.make('CartPole-v0') action_dim = [2] state_dim = [4] batch_size = 64 tau = 1e-3 actor = CartPoleActor(sess, state_dim, action_dim, 1., 1e-4, tau, batch_size) critic = CartPoleCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3, learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars()) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) ddpg_model = DDPG(env, sess, actor, critic, actor_noise, action_processor=np.argmax, model_save_path='weights/cartpole/checkpoint.ckpt', summary_path='results/cartpole/') ddpg_model.initialize(load_weights=False) ddpg_model.train() test_model(env, ddpg_model, 10)
def visualise_Data(): with open('utils/datasets/all_eqw', 'rb') as fr: history = pickle.load(fr, encoding='latin1') with open('utils/datasets/stock_names', 'rb') as fr: abbreviation = pickle.load(fr, encoding='latin1') history = history[:, :, :4] num_training_time = history.shape[1] num_testing_time = history.shape[1] window_length = 3 # get target history target_stocks = ['BLK UN EQUITY', 'GS UN EQUITY', 'USB UN EQUITY'] target_history = np.empty(shape=(len(target_stocks), num_training_time, history.shape[2])) for i, stock in enumerate(target_stocks): target_history[i] = history[ abbreviation.index(stock), :num_training_time, :] print(target_history[i]) # collect testing data testing_stocks = [ 'AMG UN EQUITY', 'BRK/B UN EQUITY', 'MTB UN EQUITY', ] testing_history = np.empty(shape=(len(testing_stocks), num_testing_time, history.shape[2])) for i, stock in enumerate(target_stocks): testing_history[i] = history[ abbreviation.index(stock), :num_testing_time, :] # dataset for 16 stocks by splitting timestamp history, abbreviation = read_stock_history( filepath='utils/datasets/stocks_history_target.h5') with open('utils/datasets/all_eqw', 'rb') as fr: history = pickle.load(fr, encoding='latin1') with open('utils/datasets/stock_names', 'rb') as fr: abbreviation = pickle.load(fr, encoding='latin1') history = history[:, :, :4] # 16 stocks are all involved. We choose first 3 years as training data num_training_time = 1095 target_stocks = abbreviation target_history = np.empty(shape=(len(target_stocks), num_training_time, history.shape[2])) for i, stock in enumerate(target_stocks): target_history[i] = history[ abbreviation.index(stock), :num_training_time, :] print((target_history.shape)) # and last 2 years as testing data. testing_stocks = abbreviation testing_history = np.empty(shape=(len(testing_stocks), history.shape[1] - num_training_time, history.shape[2])) for i, stock in enumerate(testing_stocks): testing_history[i] = history[abbreviation.index(stock), num_training_time:, :] print((testing_history.shape)) nb_classes = len(target_stocks) + 1 print(target_history.shape) print(testing_history.shape) if True: date_list = [index_to_date(i) for i in range(target_history.shape[1])] x = range(target_history.shape[1]) for i in range(len(target_stocks)): plt.figure(i) plt.plot( x, target_history[i, :, 1]) # open, high, low, close = [0, 1, 2, 3] plt.xticks(x[::200], date_list[::200], rotation=30) plt.title(target_stocks[i]) plt.show() # common settings batch_size = 64 action_bound = 1. tau = 1e-3 models = [] model_names = [] window_length_lst = [3, 7, 14, 21] predictor_type_lst = ['cnn', 'lstm'] use_batch_norm = True for window_length in window_length_lst: name = 'imit_LSTM%3A window = {}'.format(window_length) model_name = 'imitation_lstm_window_{}'.format(window_length) model_names.append(model_name) # instantiate LSTM model lstm_model = StockLSTM(nb_classes, window_length, weights_file='weights/' + name + '.h5') lstm_model.build_model(load_weights=True) models.append(lstm_model) name = 'imit_CNN%3A window = {}'.format(window_length) model_name = 'imitation_cnn_window_{}'.format(window_length) model_names.append(model_name) # instantiate CNN model cnn_model = StockCNN(nb_classes, window_length, weights_file='weights/' + name + '.h5') cnn_model.build_model(load_weights=True) models.append(cnn_model) # instantiate environment, 3 stocks, with trading cost, window_length 3, start_date sample each time for window_length in window_length_lst: for predictor_type in predictor_type_lst: name = 'DDPG_window_{}_predictor_{}'.format( window_length, predictor_type) model_names.append(name) tf.reset_default_graph() sess = tf.Session() tflearn.config.init_training_mode() action_dim = [nb_classes] state_dim = [nb_classes, window_length] variable_scope = get_variable_scope(window_length, predictor_type, use_batch_norm) with tf.variable_scope(variable_scope): actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size, predictor_type, use_batch_norm) critic = StockCritic( sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3, learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(), predictor_type=predictor_type, use_batch_norm=use_batch_norm) actor_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(action_dim)) model_save_path = get_model_path(window_length, predictor_type, use_batch_norm) summary_path = get_result_path(window_length, predictor_type, use_batch_norm) ddpg_model = DDPG(None, sess, actor, critic, actor_noise, obs_normalizer=obs_normalizer, config_file='config/stock.json', model_save_path=model_save_path, summary_path=summary_path) ddpg_model.initialize(load_weights=True, verbose=False) models.append(ddpg_model) env = MultiActionPortfolioEnv(target_history, target_stocks, model_names[8:], steps=500, sample_start_date='2012-10-30') test_model_multiple(env, models[8:])
def train_model(self): print("training period is from %s to %s"%(self.training_start_time,self.training_end_time)) self.target_history, _, self.training_start_time, self.training_end_time \ = fetch_data(start_time = self.training_start_time, end_time = self.training_end_time, window_length = self.window_length, stocks = self.target_stocks) print("total training example is %d" %(self.training_start_time-self.training_end_time)) print("self.target_history shape is", self.target_history.shape) if self.config["training"]["max_step"] <= 0: self.config["training"]["max_step"] = self.target_history.shape[1] - self.window_length-1 # print("max_steps is", self.target_history.shape[1] - self.window_length-1) env = PortfolioEnv(history = self.target_history, abbreviation = self.target_stocks, steps = self.config["training"]["max_step"], trading_cost = self.trading_cost, time_cost = self.time_cost, window_length = self.window_length, reward_function = self.config["input"]["reward_function"]) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(self.action_dim)) variable_scope = get_variable_scope(self.window_length, self.predictor_type, self.use_batch_norm) if self.config["device"] == "cpu": device_res = "/cpu:0" else: device_res = "/gpu:0" print("device is ", device_res) with tf.device(device_res): self.sess = self.start_session() with tf.variable_scope(variable_scope): actor = StockActor(sess = self.sess, feature_number = self.feature_number, state_dim = self.state_dim, action_dim = self.action_dim, action_bound = self.action_bound, learning_rate = self.config["training"]["actor learning rate"], decay_rate = self.config["training"]["actor decay rate"], decay_steps = self.config["training"]["actor decay steps"], weight_decay = self.config["training"]["actor weight decay"], tau = self.tau, batch_size = self.batch_size, predictor_type = self.predictor_type, use_batch_norm = self.use_batch_norm, activation_function = self.activation_function) critic = StockCritic(sess = self.sess, feature_number = self.feature_number, state_dim = self.state_dim, action_dim = self.action_dim, tau = self.tau, learning_rate = self.config["training"]["critic learning rate"], decay_rate = self.config["training"]["critic decay rate"], decay_steps = self.config["training"]["critic decay steps"], weight_decay = self.config["training"]["critic weight decay"], num_actor_vars = actor.get_num_trainable_vars(), predictor_type = self.predictor_type, use_batch_norm = self.use_batch_norm, activation_function = self.activation_function) ddpg_model = DDPG(env = env, sess = self.sess, actor = actor, critic = critic, actor_noise = actor_noise, obs_normalizer = obs_normalizer, config = self.config, model_save_path = self.model_save_path, summary_path = self.summary_path) ddpg_model.initialize(load_weights = False) ddpg_model.train() self.close_session() return self.train_id
actor = StockActor( sess=sess, state_dim=state_dim, action_dim=action_dim, action_bound=action_bound, learning_rate=config["training"]["actor learning rate"], tau=tau, batch_size=batch_size, predictor_type=predictor_type, use_batch_norm=use_batch_norm) critic = StockCritic( sess=sess, state_dim=state_dim, action_dim=action_dim, tau=tau, learning_rate=config["training"]["critic learning rate"], num_actor_vars=actor.get_num_trainable_vars(), predictor_type=predictor_type, use_batch_norm=use_batch_norm) ddpg_model = DDPG(env=env, sess=sess, actor=actor, critic=critic, actor_noise=actor_noise, obs_normalizer=obs_normalizer, config=config, model_save_path=model_save_path, summary_path=summary_path) ddpg_model.initialize(load_weights=False) ddpg_model.train()
critic = StockCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=critic_tau, learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(), predictor_type=predictor_type, use_batch_norm=use_batch_norm, use_previous=True, auxiliary_commission=auxil_commission) ddpg_model = DDPG(train_env, sess, actor, critic, actor_noise, obs_normalizer=obs_normalizer, gamma=gamma, training_episodes=training_episodes, max_rollout_steps=max_rollout_steps, buffer_size=buffer_size, seed=seed, batch_size=batch_size, model_save_path=model_save_path, summary_path=summary_path, infer_path=infer_path, infer_train_env=infer_train_env, infer_test_env=infer_test_env, learning_steps=learning_steps) ddpg_model.initialize(load_weights=load_weights, verbose=False) ddpg_model.train()
def init_model(self): # common settings batch_size = 64 action_bound = 1. tau = 1e-3 models = [] model_names = [] window_length_lst = [3, 7, 14, 21] predictor_type_lst = ['cnn' ,'lstm'] use_batch_norm = True nb_classes=17 for window_length in window_length_lst: name = 'imit_LSTM%3A window = {}'.format(window_length) model_name = 'imitation_lstm_window_{}'.format(window_length) model_names.append(model_name) # instantiate LSTM model lstm_model = StockLSTM(nb_classes, window_length, weights_file='weights/' + name + '.h5') lstm_model.build_model(load_weights=True) models.append(lstm_model) name = 'imit_CNN%3A window = {}'.format(window_length) model_name = 'imitation_cnn_window_{}'.format(window_length) model_names.append(model_name) # instantiate CNN model cnn_model = StockCNN(nb_classes, window_length, weights_file='weights/' + name + '.h5') cnn_model.build_model(load_weights=True) models.append(cnn_model) # instantiate environment, 3 stocks, with trading cost, window_length 3, start_date sample each time for window_length in window_length_lst: for predictor_type in predictor_type_lst: name = 'DDPG_window_{}_predictor_{}'.format(window_length, predictor_type) model_names.append(name) tf.reset_default_graph() sess = tf.Session() tflearn.config.init_training_mode() action_dim = [nb_classes] state_dim = [nb_classes, window_length] variable_scope = get_variable_scope(window_length, predictor_type, use_batch_norm) with tf.variable_scope(variable_scope): actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size, predictor_type, use_batch_norm) critic = StockCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3, learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(), predictor_type=predictor_type, use_batch_norm=use_batch_norm) actor_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(action_dim)) model_save_path = get_model_path(window_length, predictor_type, use_batch_norm) summary_path = get_result_path(window_length, predictor_type, use_batch_norm) ddpg_model = DDPG(None, sess, actor, critic, actor_noise, obs_normalizer=obs_normalizer, config_file='config/stock.json', model_save_path=model_save_path, summary_path=summary_path) ddpg_model.initialize(load_weights=True, verbose=False) models.append(ddpg_model) print("model names",model_names) return models
def _load_model(norm_func=None): ddpg_model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=norm_func, config_file='config/stock.json', model_save_path=model_save_path, summary_path=summary_path) ddpg_model.initialize(load_weights=True) return ddpg_model