Python DDPG.train Exemples, model.ddpg.ddpg.DDPG.train Python Exemples

Exemple #1

0

Afficher le fichier

    def train_model(self):
        print("training period is from %s to %s"%(self.training_start_time,self.training_end_time))
        
        self.target_history, _, self.training_start_time, self.training_end_time \
                                = fetch_data(start_time = self.training_start_time, 
                                             end_time = self.training_end_time, 
                                             window_length = self.window_length,
                                             stocks = self.target_stocks)
        
        print("total training example is %d" %(self.training_start_time-self.training_end_time))
        print("self.target_history shape is", self.target_history.shape)  
        if self.config["training"]["max_step"] <= 0:
            self.config["training"]["max_step"] = self.target_history.shape[1] - self.window_length-1
            # print("max_steps is", self.target_history.shape[1] - self.window_length-1)

        env = PortfolioEnv(history = self.target_history, 
                           abbreviation = self.target_stocks, 
                           steps = self.config["training"]["max_step"],
                           trading_cost = self.trading_cost,
                           time_cost = self.time_cost, 
                           window_length = self.window_length,
                           reward_function = self.config["input"]["reward_function"])

        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(self.action_dim))
        
        variable_scope = get_variable_scope(self.window_length, self.predictor_type, self.use_batch_norm)
        if self.config["device"] == "cpu":
            device_res = "/cpu:0"
        else:
            device_res = "/gpu:0"
        print("device is ", device_res)
        with tf.device(device_res):
            self.sess = self.start_session()
            with tf.variable_scope(variable_scope):
                actor = StockActor(sess = self.sess, 
                                   feature_number = self.feature_number,
                                   state_dim = self.state_dim, 
                                   action_dim = self.action_dim, 
                                   action_bound = self.action_bound, 
                                   learning_rate = self.config["training"]["actor learning rate"], 
                                   decay_rate = self.config["training"]["actor decay rate"],
                                   decay_steps = self.config["training"]["actor decay steps"],
                                   weight_decay = self.config["training"]["actor weight decay"],
                                   tau = self.tau, 
                                   batch_size = self.batch_size,
                                   predictor_type = self.predictor_type, 
                                   use_batch_norm = self.use_batch_norm,
                                   activation_function = self.activation_function)
                critic = StockCritic(sess = self.sess, 
                                     feature_number = self.feature_number,
                                     state_dim = self.state_dim, 
                                     action_dim = self.action_dim, 
                                     tau = self.tau,
                                     learning_rate = self.config["training"]["critic learning rate"], 
                                     decay_rate = self.config["training"]["critic decay rate"],
                                     decay_steps = self.config["training"]["critic decay steps"],
                                     weight_decay = self.config["training"]["critic weight decay"],
                                     num_actor_vars = actor.get_num_trainable_vars(),
                                     predictor_type = self.predictor_type, 
                                     use_batch_norm = self.use_batch_norm,
                                     activation_function = self.activation_function)
                ddpg_model = DDPG(env = env,
                                  sess = self.sess,
                                  actor = actor, 
                                  critic = critic, 
                                  actor_noise = actor_noise, 
                                  obs_normalizer = obs_normalizer,
                                  config = self.config, 
                                  model_save_path = self.model_save_path,
                                  summary_path = self.summary_path)
                ddpg_model.initialize(load_weights = False)
                ddpg_model.train()
                self.close_session()
        return self.train_id

Exemple #2

0

Afficher le fichier

                                     use_batch_norm)
    summary_path = get_result_path(window_length, predictor_type,
                                   use_batch_norm)

    variable_scope = get_variable_scope(window_length, predictor_type,
                                        use_batch_norm)

    with tf.variable_scope(variable_scope):
        sess = tf.Session()
        actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4,
                           tau, batch_size, predictor_type, use_batch_norm)
        critic = StockCritic(sess=sess,
                             state_dim=state_dim,
                             action_dim=action_dim,
                             tau=1e-3,
                             learning_rate=1e-3,
                             num_actor_vars=actor.get_num_trainable_vars(),
                             predictor_type=predictor_type,
                             use_batch_norm=use_batch_norm)
        ddpg_model = DDPG(env,
                          sess,
                          actor,
                          critic,
                          actor_noise,
                          obs_normalizer=obs_normalizer,
                          config_file='config/stock.json',
                          model_save_path=model_save_path,
                          summary_path=summary_path)
        ddpg_model.initialize(load_weights=False)
        ddpg_model.train()

Exemple #3

0

Afficher le fichier

Fichier : stock_trading.py Projet : ylcoder/drl-portfolio-management

        use_batch_norm = False
    else:
        raise ValueError('Unknown batch norm argument')
    actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))
    model_save_path = get_model_path(window_length, predictor_type, use_batch_norm)
    summary_path = get_result_path(window_length, predictor_type, use_batch_norm)

    variable_scope = get_variable_scope(window_length, predictor_type, use_batch_norm)

    with tf.variable_scope(variable_scope):
        sess = tf.Session()
        actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4, tau, batch_size,
                           predictor_type, use_batch_norm)
        critic = StockCritic(sess=sess, state_dim=state_dim, action_dim=action_dim, tau=1e-3,
                             learning_rate=1e-3, num_actor_vars=actor.get_num_trainable_vars(),
                             predictor_type=predictor_type, use_batch_norm=use_batch_norm)

        if TRAIN:
            ddpg_model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=NORM_FUNC,
                              config_file='config/stock.json', model_save_path=model_save_path,
                              summary_path=summary_path)
            ddpg_model.initialize(load_weights=False)
            ddpg_model.train(debug=DEBUG)
        elif TEST:
            env = PortfolioEnv(target_history, target_stocks, steps=(num_training_time - window_length - 2),
                               window_length=window_length)
            test_model(env, _load_model(norm_func=NORM_FUNC))
        else:
            # for prediction
            predict_next_day(env, sess, actor, critic, actor_noise, norm_func=NORM_FUNC)