def train_model(self): print("training period is from %s to %s"%(self.training_start_time,self.training_end_time)) self.target_history, _, self.training_start_time, self.training_end_time \ = fetch_data(start_time = self.training_start_time, end_time = self.training_end_time, window_length = self.window_length, stocks = self.target_stocks) print("total training example is %d" %(self.training_start_time-self.training_end_time)) print("self.target_history shape is", self.target_history.shape) if self.config["training"]["max_step"] <= 0: self.config["training"]["max_step"] = self.target_history.shape[1] - self.window_length-1 # print("max_steps is", self.target_history.shape[1] - self.window_length-1) env = PortfolioEnv(history = self.target_history, abbreviation = self.target_stocks, steps = self.config["training"]["max_step"], trading_cost = self.trading_cost, time_cost = self.time_cost, window_length = self.window_length, reward_function = self.config["input"]["reward_function"]) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(self.action_dim)) variable_scope = get_variable_scope(self.window_length, self.predictor_type, self.use_batch_norm) if self.config["device"] == "cpu": device_res = "/cpu:0" else: device_res = "/gpu:0" print("device is ", device_res) with tf.device(device_res): self.sess = self.start_session() with tf.variable_scope(variable_scope): actor = StockActor(sess = self.sess, feature_number = self.feature_number, state_dim = self.state_dim, action_dim = self.action_dim, action_bound = self.action_bound, learning_rate = self.config["training"]["actor learning rate"], decay_rate = self.config["training"]["actor decay rate"], decay_steps = self.config["training"]["actor decay steps"], weight_decay = self.config["training"]["actor weight decay"], tau = self.tau, batch_size = self.batch_size, predictor_type = self.predictor_type, use_batch_norm = self.use_batch_norm, activation_function = self.activation_function) critic = StockCritic(sess = self.sess, feature_number = self.feature_number, state_dim = self.state_dim, action_dim = self.action_dim, tau = self.tau, learning_rate = self.config["training"]["critic learning rate"], decay_rate = self.config["training"]["critic decay rate"], decay_steps = self.config["training"]["critic decay steps"], weight_decay = self.config["training"]["critic weight decay"], num_actor_vars = actor.get_num_trainable_vars(), predictor_type = self.predictor_type, use_batch_norm = self.use_batch_norm, activation_function = self.activation_function) ddpg_model = DDPG(env = env, sess = self.sess, actor = actor, critic = critic, actor_noise = actor_noise, obs_normalizer = obs_normalizer, config = self.config, model_save_path = self.model_save_path, summary_path = self.summary_path) ddpg_model.initialize(load_weights = False) ddpg_model.train() self.close_session() return self.train_id
use_batch_norm) summary_path = get_result_path(window_length, predictor_type, use_batch_norm) variable_scope = get_variable_scope(window_length, predictor_type, use_batch_norm) with tf.variable_scope(variable_scope): sess = tf.Session() actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size, predictor_type, use_batch_norm) critic = StockCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3, learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(), predictor_type=predictor_type, use_batch_norm=use_batch_norm) ddpg_model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=obs_normalizer, config_file='config/stock.json', model_save_path=model_save_path, summary_path=summary_path) ddpg_model.initialize(load_weights=False) ddpg_model.train()
use_batch_norm = False else: raise ValueError('Unknown batch norm argument') actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) model_save_path = get_model_path(window_length, predictor_type, use_batch_norm) summary_path = get_result_path(window_length, predictor_type, use_batch_norm) variable_scope = get_variable_scope(window_length, predictor_type, use_batch_norm) with tf.variable_scope(variable_scope): sess = tf.Session() actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size, predictor_type, use_batch_norm) critic = StockCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3, learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(), predictor_type=predictor_type, use_batch_norm=use_batch_norm) if TRAIN: ddpg_model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=NORM_FUNC, config_file='config/stock.json', model_save_path=model_save_path, summary_path=summary_path) ddpg_model.initialize(load_weights=False) ddpg_model.train(debug=DEBUG) elif TEST: env = PortfolioEnv(target_history, target_stocks, steps=(num_training_time - window_length - 2), window_length=window_length) test_model(env, _load_model(norm_func=NORM_FUNC)) else: # for prediction predict_next_day(env, sess, actor, critic, actor_noise, norm_func=NORM_FUNC)