def load_envs_and_config(model_file):
    save_dict = torch.load(model_file)

    config = save_dict['config']
    config['device'] = 'cuda'

    env_factory = EnvFactory(config=config)
    reward_env = env_factory.generate_reward_env()
    reward_env.load_state_dict(save_dict['model'])
    real_env = env_factory.generate_real_env()

    return reward_env, real_env, config
Exemple #2
0
def load_envs_and_config(model_file):
    save_dict = torch.load(model_file)

    config = save_dict['config']
    config['device'] = 'cpu'
    config['envs']['CartPole-v0']['solved_reward'] = 100000  # something big enough to prevent early out triggering

    env_factory = EnvFactory(config=config)
    reward_env = env_factory.generate_reward_env()
    reward_env.load_state_dict(save_dict['model'])
    real_env = env_factory.generate_real_env()

    return reward_env, real_env, config
def load_envs_and_config(model_file):
    save_dict = torch.load(model_file)

    config = save_dict['config']
    if BREAK == 'solved':
        config['envs']['Cliff']['solved_reward'] = -20  # something big enough to prevent early out triggering
    else:
        config['envs']['Cliff']['solved_reward'] = 100000  # something big enough to prevent early out triggering

    env_factory = EnvFactory(config=config)
    reward_env = env_factory.generate_reward_env()
    reward_env.load_state_dict(save_dict['model'])
    real_env = env_factory.generate_real_env()

    return reward_env, real_env, config
Exemple #4
0
    def compute(self, working_dir, bohb_id, config_id, cso, budget, *args,
                **kwargs):
        with open(CONFIG_FILE, 'r') as stream:
            default_config = yaml.safe_load(stream)

        config = self.get_specific_config(cso, default_config, budget)
        print('----------------------------')
        print("START BOHB ITERATION")
        print('CONFIG: ' + str(config))
        print('CSO:    ' + str(cso))
        print('BUDGET: ' + str(budget))
        print('----------------------------')

        info = {}

        # generate environment
        env_fac = EnvFactory(config)

        real_env = env_fac.generate_real_env()
        reward_env = env_fac.generate_reward_env()
        save_dict = torch.load(SAVE_FILE)
        #config = save_dict['config']
        reward_env.load_state_dict(save_dict['model'])

        score = 0
        for i in range(NUM_EVALS):
            td3 = TD3(env=reward_env,
                      max_action=reward_env.get_max_action(),
                      config=config)
            reward_list_train, _, _ = td3.train(reward_env, test_env=real_env)
            reward_list_test, _, _ = td3.test(real_env)
            avg_reward_test = statistics.mean(reward_list_test)

            unsolved_weight = config["agents"]["gtn"]["unsolved_weight"]
            score += len(reward_list_train) + max(
                0, (real_env.get_solved_reward() -
                    avg_reward_test)) * unsolved_weight

        score = score / NUM_EVALS

        info['config'] = str(config)

        print('----------------------------')
        print('FINAL SCORE: ' + str(score))
        print("END BOHB ITERATION")
        print('----------------------------')

        return {"loss": score, "info": info}
Exemple #5
0
        else:
            self.eps *= self.eps_decay
            self.eps = max(self.eps, self.eps_min)


if __name__ == "__main__":
    with open("../default_config_gridworld.yaml", "r") as stream:
        config = yaml.safe_load(stream)
    print(config)
    torch.set_num_threads(1)

    # generate environment
    env_fac = EnvFactory(config)
    real_env = env_fac.generate_real_env()
    # virtual_env = env_fac.generate_virtual_env()
    reward_env = env_fac.generate_reward_env()

    reward_list_len = []
    for i in range(20):
        ql = QL(env=real_env, config=config, count_based=False)
        reward_list_train, episode_length_list_train, _ = ql.train(
            env=real_env, test_env=real_env, time_remaining=5000)
        reward_list_test, episode_length_list_test, _ = ql.test(
            env=real_env, time_remaining=500)
        reward_list_len.append(len(reward_list_train))
        print(len(reward_list_train))
        print(sum(episode_length_list_train))

    import statistics

    print(statistics.mean(reward_list_len))