コード例 #1
0
def search(config):
    get_params = get_agent_class(config).get_random_config
    params_keys = list(get_params().keys())
    nb_hp_params = len(params_keys)

    if config['debug']:
        print('*** Number of hyper-parameters: %d' % nb_hp_params)

    config['max_iter'] = 5 if config['debug'] else 500
    futures = []
    with concurrent.futures.ProcessPoolExecutor(min(multiprocessing.cpu_count(), config['nb_process'])) as executor:
        nb_config = 5 if config['debug'] else 200 * nb_hp_params
        for i in range(nb_config): 
            params = get_params(config["fixed_params"])
            config.update(params)
            config['random_seed'] = 1

            futures.append(executor.submit(test_params, i, copy.deepcopy(config), copy.deepcopy(params)))
        concurrent.futures.wait(futures)
    
    results = [future.result() for future in futures]
    results = sorted(results, key=lambda result: result['mean_score'], reverse=True)
    best_params = results[0]['params']
        
    return { 
        'best_params': best_params
        , 'results': results
    }
コード例 #2
0
ファイル: run_experiment.py プロジェクト: chtran/easy_rl
def main(env_id, agent_id, n_test_episodes, render, debug):
    rng = np.random.RandomState(42)

    if env_id == 'Point-v0':
        from environments import point_env
        env = gym.make('Point-v0')
    else:
        env = gym.make('MountainCarContinuous-v0')

    env.seed(42)
    tf.set_random_seed(42)
    np.random.seed(42)
    agent_class = agents.get_agent_class(agent_id)
    agent = agent_class(env, debug)
    agent.train()
    episode_rewards = []
    for i in range(n_test_episodes):
        state = env.reset()
        done = False
        episode_reward = 0
        render_episode = (i % 5 == 0)
        while not done:
            action = agent.get_action(state)
            state, reward, done, _ = env.step(action)
            episode_reward += reward
            if render and render_episode:
                env.render()
        episode_rewards.append(episode_reward)
    print("Average rewards:", np.mean(episode_rewards))
コード例 #3
0
ファイル: fullsearch.py プロジェクト: wandererzzx/openai-rl
def third_pass(config, best_lr):
    config = copy.deepcopy(config)

    config["fixed_params"] = {'lr': best_lr}
    config['result_dir_prefix'] = config['result_dir_prefix'] + '/third-pass'
    config['games_per_epoch'] = 5 if config['debug'] else 100
    dry_run = True if config['debug'] else False

    get_params = get_agent_class(config).get_random_config
    hb = Hyperband(get_params, run_params)

    summary = hb.run(config, skip_last=True, dry_run=dry_run)

    return summary
コード例 #4
0
    def test_mcagent_act(self):
        config = {
            'lr': 1 # unused
            , 'agent_name': 'TabularMCAgent'
            , 'env_name': 'CartPole-v0'
            , 'random_seed': 0
            , 'result_dir': dir + '/results'
            , 'discount': 1.
            # 'debug': True
        }
        np.random.seed(0)
        config.update(get_agent_class(config).get_random_config())
        config['discount'] = 1.

        env = gym.make(config['env_name'])
        env.seed(0)

        agent = make_agent(config, env)
        act, state_id = agent.act(env.reset())

        self.assertEqual(act, 1)
        self.assertEqual(state_id, 144)
コード例 #5
0
class MagicThing(BasicMagicThing):
    off=1
    deff=1
    opponent_class=get_agent_class()
    understand=5
    activated=False
    activated_description= False
    multiple_choice=False
    
    def craft_aid_off(self,opponent):
        return self.activated and isinstance(opponent,self.opponent_class) and self.off or 1
        
    def craft_aid_def(self,opponent):
        return self.activated and isinstance(opponent,self.opponent_class) and self.deff or 1

    def look_at(self,player,gui):
        Thing.look_at(self,player,gui)
        if not self.activated:
            gui.prn("C'è qualcosa misterioso qui.")
            understand = self.understand
            while understand > 0:
                outcome = self.challenge(player,gui)
                if not outcome:
                    player_damage = min((understand+3)//4,player.sp)
                    gui.prn('Perdi %d punti di spirito.\n' % player_damage)
                    player.sp -= player_damage
                    player.long_sp -= player_damage/2
                    return False
                else:
                    understand = understand-player.craft
            self.activated = True
        if self.activated_description:
            gui.prn(self.activated_description)
        return True

    def use(self,player,gui):
        if self.activated:
            BasicMagicThing.use(self,player,gui)
コード例 #6
0
ファイル: fullsearch.py プロジェクト: wandererzzx/openai-rl
def first_pass(config):
    config = copy.deepcopy(config)

    config['result_dir_prefix'] = config['result_dir_prefix'] + '/first-pass'
    if config['debug']:
        print('Removing fixed params')
    config["fixed_params"] = {}
    config['max_iter'] = 5 if config['debug'] else 150
    if config['debug']:
        print('Overriding max_iter params to %d' % config['max_iter'])
    dry_run = True if config['debug'] else False

    get_params = get_agent_class(config).get_random_config

    results = []
    futures = []
    with concurrent.futures.ProcessPoolExecutor(
            min(multiprocessing.cpu_count(),
                config['nb_process'])) as executor:
        nb_config = 5 if config['debug'] else 1000
        for i in range(nb_config):
            params = get_params(config["fixed_params"])
            config.update(params)

            futures.append(
                executor.submit(exec_first_pass, i, copy.deepcopy(config),
                                params))
        concurrent.futures.wait(futures)

    results = []
    for future in futures:
        results.append(future.result())

    return {
        'results':
        sorted(results, key=lambda result: result['mean_score'], reverse=True)
    }
コード例 #7
0
    def test_mcagent_learn_from_episode(self):
        config = {
            'lr': 1 # unused
            , 'agent_name': 'TabularMCAgent'
            , 'env_name': 'CartPole-v0'
            , 'random_seed': 0
            , 'result_dir': dir + '/results'
            , 'discount': 1.
            # 'debug': True
        }
        np.random.seed(0)
        config.update(get_agent_class(config).get_random_config())
        config['discount'] = 1.

        env = gym.make(config['env_name'])
        env.seed(0)

        agent = make_agent(config, env)
        agent.learn_from_episode(env)

        qs = agent.sess.run(agent.Qs)
        # for i,q in enumerate(qs):
        #     print(i,q)
        self.assertEqual(np.sum(np.isclose(qs[126], [ 4.49999952,  1.99999976])) == 2, True)
コード例 #8
0
def main(_):
    config = flags.FLAGS.__flags.copy()
    config["fixed_params"] = json.loads(config["fixed_params"])

    # if os.path.isfile(config['result_dir'] + '/config.json'):
    #     print("Overriding shell configuration with the one found in " + config['result_dir'])
    #     with open(config['result_dir'] + '/config.json', 'r') as f:
    #         config = json.loads(f.read())

    if config['hyperband']:
        print('Starting hyperband search')

        config['result_dir_prefix'] = dir + '/results/hyperband/' + str(
            int(time.time()))

        get_params = get_agent_class(config).get_random_config
        hb = Hyperband(get_params, run_params)
        results = hb.run(config, skip_last=True, dry_run=config['dry_run'])

        if not os.path.exists(config['result_dir_prefix']):
            os.makedirs(config['result_dir_prefix'])
        with open(config['result_dir_prefix'] + '/hb_results.json', 'w') as f:
            json.dump(results, f)

    elif config['fullsearch']:
        print('*** Starting full search')
        config['result_dir_prefix'] = dir + '/results/fullsearch/' + str(
            int(time.time())) + '-' + config['agent_name']
        os.makedirs(config['result_dir_prefix'])

        print('*** Starting first pass: full random search')
        summary = fullsearch.first_pass(config)
        with open(config['result_dir_prefix'] + '/fullsearch_results1.json',
                  'w') as f:
            json.dump(summary, f)

        print('*** Starting second pass: Learning rate search')
        best_agent_config = summary['results'][0]['params']
        summary = fullsearch.second_pass(config, best_agent_config)
        with open(config['result_dir_prefix'] + '/fullsearch_results2.json',
                  'w') as f:
            json.dump(summary, f)

        print('*** Starting third pass: Hyperband search with best lr')
        best_lr = summary['results'][0]['lr']
        summary = fullsearch.third_pass(config, best_lr)
        with open(config['result_dir_prefix'] + '/fullsearch_results3.json',
                  'w') as f:
            json.dump(summary, f)
    elif config['randomsearch']:
        print('*** Starting random search')
        config['result_dir_prefix'] = dir + '/results/randomsearch/' + str(
            int(time.time())) + '-' + config['agent_name']
        os.makedirs(config['result_dir_prefix'])

        summary = randomsearch.search(config)
        with open(config['result_dir_prefix'] + '/fullsearch_results1.json',
                  'w') as f:
            json.dump(summary, f)
    else:
        env = gym.make(config['env_name'])
        agent = make_agent(config, env)

        if config['play']:
            for i in range(config['play_nb']):
                agent.play(env)
        else:
            agent.train()
            agent.save()
コード例 #9
0
 def init_move(self,place):
     self.move(place)
     if isinstance(place,get_agent_class()):
         self.place.place.game_map.all_things.append(self)
     else:    
         self.place.game_map.all_things.append(self)