def ddqn(env_type, experiment_id, config_file): params = read_yaml(config_file) params['model_type'] = 'DDQN' params['env_type'] = env_type params['experiment_id'] = experiment_id save_config(params, experiment_id) env = make_env(env_type, params) env.make_world(wall_prob=params.wall_prob, food_prob=0) q_net = create_nn(params) agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop) agent.train(params.episodes, params.episode_step, params.random_step, params.min_greedy, params.max_greedy, params.greedy_step, params.update_period)
def ddqn(env_type, experiment_id, config_file): ''' Double Deep Q-learning Args: env_type: Evnrionment Type experiment_id: Id for the experiment config_file: Path of the config file ''' params = read_yaml(config_file) params['model_type'] = 'DDQN' params['env_type'] = env_type params['experiment_id'] = experiment_id save_config(params, experiment_id) env = make_env(env_type, params) env.make_world(wall_prob=params.wall_prob, wall_seed=20, food_prob=0) q_net = create_nn(params) agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop) agent.train(params.episodes, params.episode_step, params.random_step, params.min_greedy, params.max_greedy, params.greedy_step, params.update_period)
def calc_reference_deviation(virtual_env, real_env, config): state_reward_concat = None for i in range(10): agent = DDQN(env=real_env, config=config) _, _, replay_buffer_train = agent.train(env=virtual_env) states, _, _, rewards, _ = replay_buffer_train.get_all() state_reward = torch.cat((states, rewards), 1) if state_reward_concat == None: state_reward_concat = state_reward else: state_reward_concat = torch.cat((state_reward_concat, state_reward), 0) print(state_reward_concat.shape) print(torch.std(state_reward_concat, dim=0)) return torch.std(state_reward_concat, dim=0).item()
def compute(self, working_dir, bohb_id, config_id, cso, budget, *args, **kwargs): with open("default_config_cartpole.yaml", 'r') as stream: default_config = yaml.safe_load(stream) config = self.get_specific_config(cso, default_config, budget) print('----------------------------') print("START BOHB ITERATION") print('CONFIG: ' + str(config)) print('CSO: ' + str(cso)) print('BUDGET: ' + str(budget)) print('----------------------------') info = {} # generate environment env_fac = EnvFactory(config) env = env_fac.generate_real_env() ddqn = DDQN(env=env, config=config, icm=True) score_list = [] for _ in range(5): rewards, _, _ = ddqn.train(env) score_i = len(rewards) score_list.append(score_i) score = np.mean(score_list) info['config'] = str(config) print('----------------------------') print('FINAL SCORE: ' + str(score)) print("END BOHB ITERATION") print('----------------------------') return { "loss": score, "info": info }