Exemple #1
0
def ddqn(env_type, experiment_id, config_file):
    params = read_yaml(config_file)
    params['model_type'] = 'DDQN'
    params['env_type'] = env_type
    params['experiment_id'] = experiment_id

    save_config(params, experiment_id)
    env = make_env(env_type, params)
    env.make_world(wall_prob=params.wall_prob, food_prob=0)
    q_net = create_nn(params)
    agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop)
    agent.train(params.episodes, params.episode_step, params.random_step,
                params.min_greedy, params.max_greedy, params.greedy_step,
                params.update_period)
Exemple #2
0
def ddqn(env_type, experiment_id, config_file):
    '''
    Double Deep Q-learning

    Args:
        env_type: Evnrionment Type
        experiment_id: Id for the experiment
        config_file: Path of the config file
    '''

    params = read_yaml(config_file)
    params['model_type'] = 'DDQN'
    params['env_type'] = env_type
    params['experiment_id'] = experiment_id

    save_config(params, experiment_id)
    env = make_env(env_type, params)
    env.make_world(wall_prob=params.wall_prob, wall_seed=20, food_prob=0)
    q_net = create_nn(params)
    agent = DDQN(params, env, q_net, nn.MSELoss(), optim.RMSprop)
    agent.train(params.episodes, params.episode_step, params.random_step,
                params.min_greedy, params.max_greedy, params.greedy_step,
                params.update_period)
Exemple #3
0
def calc_reference_deviation(virtual_env, real_env, config):

    state_reward_concat = None

    for i in range(10):
        agent = DDQN(env=real_env, config=config)
        _, _, replay_buffer_train = agent.train(env=virtual_env)

        states, _, _, rewards, _ = replay_buffer_train.get_all()
        state_reward = torch.cat((states, rewards), 1)

        if state_reward_concat == None:
            state_reward_concat = state_reward
        else:
            state_reward_concat = torch.cat((state_reward_concat, state_reward), 0)

        print(state_reward_concat.shape)
        print(torch.std(state_reward_concat, dim=0))

    return torch.std(state_reward_concat, dim=0).item()
    def compute(self, working_dir, bohb_id, config_id, cso, budget, *args, **kwargs):
        with open("default_config_cartpole.yaml", 'r') as stream:
            default_config = yaml.safe_load(stream)

        config = self.get_specific_config(cso, default_config, budget)
        print('----------------------------')
        print("START BOHB ITERATION")
        print('CONFIG: ' + str(config))
        print('CSO:    ' + str(cso))
        print('BUDGET: ' + str(budget))
        print('----------------------------')

        info = {}

        # generate environment
        env_fac = EnvFactory(config)
        env = env_fac.generate_real_env()

        ddqn = DDQN(env=env,
                    config=config,
                    icm=True)

        score_list = []
        for _ in range(5):
            rewards, _, _ = ddqn.train(env)
            score_i = len(rewards)
            score_list.append(score_i)

        score = np.mean(score_list)

        info['config'] = str(config)

        print('----------------------------')
        print('FINAL SCORE: ' + str(score))
        print("END BOHB ITERATION")
        print('----------------------------')

        return {
                "loss": score,
                "info": info
                }