def dqn(env, learning_rate, batch_size, random_step, log_dir, weight_dir): print('Env Name: ', env) env = gym.make(env) print('Action Space: ', env.action_space.n) print('State Shape:', env.render(mode='rgb_array').shape) agent = DQN(env, QNet(env.action_space.n), nn.MSELoss(), optim.RMSprop, lr=learning_rate, log_dir=log_dir, weight_dir=weight_dir) agent.train(batch_size=batch_size, random_step=random_step)
def dqn(env_type, experiment_id, config_file): params = read_yaml(config_file) params['model_type'] = 'DQN' params['env_type'] = env_type params['experiment_id'] = experiment_id save_config(params, experiment_id) env = make_env(env_type, params) env.make_world(wall_prob=params.wall_prob, food_prob=0) q_net = create_nn(params) agent = DQN(params, env, q_net, nn.MSELoss(), optim.RMSprop) agent.train(params.episodes, params.episode_step, params.random_step, params.min_greedy, params.max_greedy, params.greedy_step, params.update_period)
def dqn(env_type, experiment_id, config_file): ''' Deep Q-learning Args: env_type: Evnrionment Type experiment_id: Id for the experiment config_file: Path of the config file ''' params = read_yaml(config_file) params['model_type'] = 'DQN' params['env_type'] = env_type params['experiment_id'] = experiment_id save_config(params, experiment_id) env = make_env(env_type, params) env.make_world(wall_prob=params.wall_prob, wall_seed=20, food_prob=0) q_net = create_nn(params) agent = DQN(params, env, q_net, nn.MSELoss(), optim.RMSprop) agent.train(params.episodes, params.episode_step, params.random_step, params.min_greedy, params.max_greedy, params.greedy_step, params.update_period)
class DQN_runner(MixinCore): def __init__( self, env_cls: object, MV_res: bool, experiment_type: str, seed: int, episodes: int, N_train: int, len_series: Union[int or None], dt: int, start_train: int, save_freq: int, use_GPU: bool, outputDir: str = "_outputs", outputClass: str = "DQN", outputModel: str = "test", varying_pars: Union[list or None] = None, varying_type: str = "chunk", num_cores: int = None, ): self.logging.info("Starting model setup") self._setattrs() self.rng = np.random.RandomState(self.seed) if self.use_GPU: gpu_devices = tf.config.experimental.list_physical_devices("GPU") for device in gpu_devices: tf.config.experimental.set_memory_growth(device, True) else: my_devices = tf.config.experimental.list_physical_devices( device_type="CPU") tf.config.experimental.set_visible_devices(devices=my_devices, device_type="CPU") if self.dt != 1.0: # self.len_series = self.len_series * (1/self.dt) self.N_train = int(self.N_train * (1 / self.dt)) if self.episodes: self.N_train = self.episodes * self.len_series self.col_names_oos = [ str(e) for e in np.arange(0, self.episodes + 1, save_freq)[1:] ] else: self.len_series = self.N_train self.save_freq_n = self.N_train // save_freq self.col_names_oos = [ str(int(i)) for i in np.arange(0, self.N_train + 1, self.save_freq_n) ][1:] self.savedpath = GeneratePathFolder( outputDir, outputClass, outputModel, varying_pars, varying_type, self.N_train, ) if save_freq and not os.path.exists( os.path.join(self.savedpath, "ckpt")): os.makedirs(os.path.join(self.savedpath, "ckpt")) elif save_freq and os.path.exists(os.path.join(self.savedpath, "ckpt")): pass logging.info("Successfully generated path to save outputs...") def run(self): """Wrapper for keyboard interrupt.""" try: self.set_up_training() # if self.episodes: # self.training_episodic_agent() # else: self.training_agent() except (KeyboardInterrupt, SystemExit): self.logging.debug("Exit on KeyboardInterrupt or SystemExit") sys.exit() def set_up_training(self): self.logging.debug("Simulating Data") self.data_handler = DataHandler(N_train=self.N_train, rng=self.rng) if self.experiment_type == "GP": self.data_handler.generate_returns() else: self.data_handler.generate_returns() # TODO check if these method really fit and change the parameters in the gin file self.data_handler.estimate_parameters() self.logging.debug("Instantiating action space") if self.MV_res: self.action_space = ResActionSpace() else: action_range, ret_quantile, holding_quantile = get_action_boundaries( N_train=self.N_train, f_speed=self.data_handler.f_speed, returns=self.data_handler.returns, factors=self.data_handler.factors, ) gin.query_parameter("%ACTION_RANGE")[0] = action_range self.action_space = ActionSpace() self.logging.debug("Instantiating market environment") self.env = self.env_cls( N_train=self.N_train, f_speed=self.data_handler.f_speed, returns=self.data_handler.returns, factors=self.data_handler.factors, ) self.logging.debug("Instantiating DQN model") input_shape = self.env.get_state_dim() self.train_agent = DQN( input_shape=input_shape, action_space=self.action_space, rng=self.rng, N_train=self.N_train, ) self.logging.debug( "Set up length of training and instantiate test env") self.train_agent._get_exploration_length(self.N_train) self.logging.debug("Instantiating Out of sample tester") self.oos_test = Out_sample_vs_gp( savedpath=self.savedpath, tag="DQN", experiment_type=self.experiment_type, env_cls=self.env_cls, MV_res=self.MV_res, ) self.oos_test.init_series_to_fill(iterations=self.col_names_oos) def training_agent(self): """ Main routine to train and test the DRL algorithm. The steps are: 1. Load the dataset, metadata, any model output and any pre-loaded data (cached_data). 2. Start the Backtrader engine and initialize the broker object. 3. Instantiate the environment. 4. Instantiate the model for the agent. 5. Train the model according to a chosen technique. 6. Test the model out-of-sample. 7. Log the performance data, plot, save configuration file and the runner logger output. Once this is done, the backtest is over and all of the artifacts are saved in `_exp/experiment_name/_backtests/`. """ self.logging.debug("Training...") CurrState, _ = self.env.reset() # CurrOptState = env.opt_reset() # OptRate, DiscFactorLoads = env.opt_trading_rate_disc_loads() for i in tqdm(iterable=range(self.N_train + 1), desc="Training DQNetwork"): self.train_agent.update_epsilon() epsilon = self.train_agent.epsilon side_only = self.action_space.side_only copy_step = self.train_agent.copy_step action, qvalues = self.train_agent.eps_greedy_action( CurrState, epsilon, side_only=side_only) if not side_only: unscaled_action = action else: unscaled_action = get_bet_size( qvalues, action, action_limit=self.action_space.action_range[0], zero_action=self.action_space.zero_action, rng=self.rng, ) if self.MV_res: NextState, Result, _ = self.env.MV_res_step( CurrState, unscaled_action, i) else: NextState, Result, _ = self.env.step(CurrState, unscaled_action, i) self.env.store_results(Result, i) exp = { "s": CurrState, "a": action, "a_unsc": unscaled_action, "r": Result["Reward_DQN"], "s2": NextState, } self.train_agent.add_experience(exp) self.train_agent.train(i, side_only) if (i % copy_step == 0) and (i > self.train_agent.start_train): self.train_agent.copy_weights() CurrState = NextState if (i % self.save_freq_n == 0) and (i > self.train_agent.start_train): self.train_agent.model.save_weights( os.path.join(self.savedpath, "ckpt", "DQN_{}_ep_weights".format(i)), save_format="tf", ) self.logging.debug("Testing...") self.oos_test.run_test(it=i, test_agent=self.train_agent) # if executeGP: # NextOptState, OptResult = env.opt_step( # CurrOptState, OptRate, DiscFactorLoads, i # ) # env.store_results(OptResult, i) # CurrOptState = NextOptState self.oos_test.save_series() save_gin(os.path.join(self.savedpath, "config.gin")) logging.info("Config file saved")
def main(config_file): # Check TF version logging.info("Tensorflow version: {}".format(tf.version.VERSION)) # Load main config file with open(config_file, "r") as f: config = yaml.load(f) result_path = config["result_dir"] agent_type = config["agent"] agent_config_file = os.path.join(config["agent_config_dir"], str(agent_type) + ".yml") mode = config["mode"] environment = config["environment"] environment_seed = config["environment_seed"] # Load config file for agent with open(agent_config_file, "r") as f: agent_config = yaml.load(f) # Create output directory time_str = time.strftime("%Y%m%d_%H%M%S") result_path = os.path.join(result_path, agent_type, time_str) if not os.path.exists(result_path): os.makedirs(result_path) agent_config["render_environment"] = config["render_environment"] agent_config["max_episode"] = config["max_episode"] agent_config["max_step"] = config["max_step"] agent_config["slide_window"] = config["slide_window"] agent_config["result_path"] = result_path # Save config files to output directory copyfile(config_file, os.path.join(result_path, os.path.basename(config_file))) copyfile(config_file, os.path.join(result_path, os.path.basename(agent_config_file))) logging.info( mode + " with {} algorithm in environment {}".format(agent_type, environment)) logging.info("Results will be saved at {}".format(result_path)) # Initialize environment env = gym.make('CartPole-v1') env.seed(environment_seed) env = env.unwrapped # Build/load agent if agent_type == "DQN": agent = DQN(agent_config, env) agent.train() elif agent_type == "DDQN": agent = DDQN(agent_config, env) agent.train() elif agent_type == "DDQN_PER_Prop": agent = DDQN_PER_Prop(agent_config, env) agent.train() elif agent_type == "A2C": agent = A2C(agent_config, env) agent.train() elif agent_type == "REINFORCE": agent = REINFORCE(agent_config, env) agent.train() else: raise KeyError("Agent type does not exist") # Train or play if mode == "train": agent.train() elif mode == "play": agent.play()