コード例 #1
0
ファイル: runner.py プロジェクト: AmoyZhp/DynamicETC_Zhp
    def random_test(self, iteration, graph_data, template_env: DynamicETC):
        agents = []
        for road in graph_data['edges']:
            agent = RandomAgent(road['source'], road['target'],
                                const.ACTION_SPACE, const.MAX_TIME_STEP)
            agents.append(agent)
        env = template_env.clone()
        total_average_reward = 0.0
        for i in range(iteration):

            state = env.reset()
            action_seqs = []
            for agent in agents:
                action_seqs.append(agent.act())
            cumulative_reward = 0
            for step in range(const.MAX_TIME_STEP):
                actions = []
                for action_seq in action_seqs:
                    actions.append({
                        'source': action_seq.get_id()['source'],
                        'target': action_seq.get_id()['target'],
                        'toll': action_seq.get_action(step)
                    })
                state, reward, terminal, info = env.step(actions)
                cumulative_reward += reward

            average_reward = cumulative_reward / const.MAX_TIME_STEP
            logging.info(
                "average cumulative reward is {}".format(average_reward))
            total_average_reward = total_average_reward * (
                i / (i + 1)) + average_reward / (i + 1)

        logging.info("total average reward is {}".format(total_average_reward))
コード例 #2
0
ファイル: agent_factory.py プロジェクト: gr4cza/Connect4_AI
 def _get_agent(agent_type, player):
     if agent_type == 'Player':
         return Player()
     if agent_type == 'RandomPlayer':
         return RandomAgent()
     if agent_type == 'QLearn':
         return QLearn(player, source_name='models/min_max_5_10K_p1_20191111_202358.pkl')
     if agent_type == 'MinMax':
         return MinMaxAgent(player, 4, AdvancedScore.score)
     if agent_type == 'AlphaBeta':
         return MinMaxAgentWAlphaBeta(player, 6, AdvancedScore.score)
     if agent_type == 'MonteCarlo':
         return MonteCarlo(player, 2000)
     if agent_type == 'AlphaZero':
         from alpha_zero.alpha_net import AlphaNet
         alpha_net = AlphaNet('test_net_1_1_2_3')
         return AlphaZero(player, net=alpha_net, mcts_turns=300, print_policy=True)
コード例 #3
0
        lives = 3
        while True:
            action = agent.predict_action(obs)
            obs, reward, done, info = env.step(action)
            env.render()
            time.sleep(sleep)
            if done:
                lives -= 1
            if done and lives == 0:
                break
    env.close()


agent_runner = {
    'rand':
    lambda s, e: run_trained(RandomAgent(), s, e),
    'dqn':
    lambda s, e: run_trained(DQNAgent(), s, e),
    'a2c':
    lambda s, e: run_trained(A2CAgent(), s, e),
    'ppo':
    lambda s, e: run_trained(PPOAgent(), s, e),
    'dqn_custom':
    lambda s, e: run_custom_trained(model_path=os.path.join(
        MODELS_ROOT, "custom_ep_63.pth"),
                                    sleep=s,
                                    episodes=e),
    'dqn_forgetting':
    lambda s, e: run_custom_trained(model_path=os.path.join(
        MODELS_ROOT, "custom_ep_89_forgetting.pth"),
                                    sleep=s,
コード例 #4
0
def greedy_chaser(env_name, episode_count, load_path, display=True, fps=10):
    """Show result of trained chaser to chase random agent runner.
    """
    env = gym.make(env_name)
    chaser = GreedyAgent(default_reward=-1.0,
                         name='chaser',
                         color=(1.0, 0.0, 0.0),
                         env=env,
                         agent_type=AgentType.Chaser,
                         load_path=load_path,
                         features_n=4)

    runner = RandomAgent(default_reward=1.0,
                         name='runner',
                         color=(0.0, 1.0, 0.0),
                         env=env,
                         agent_type=AgentType.Runner)

    env.add_agent(chaser)
    env.add_agent(runner)

    for epi in range(episode_count):
        state_map = env.reset()
        chaser_info = state_map[chaser.name]
        runner_info = state_map[runner.name]

        chaser_x = chaser_info['state'][0]
        chaser_y = chaser_info['state'][1]
        runner_x = runner_info['state'][0]
        runner_y = runner_info['state'][1]

        chaser_state = [chaser_x, chaser_y, runner_x, runner_y]
        runner_state = [runner_x, runner_y, chaser_x, chaser_y]

        step = 0
        while True:
            if display:
                env.render()
                time.sleep(1 / fps)
            action = chaser.act(chaser_state)
            chaser_poi, direction, _, done, _ = env.step(action, chaser.name)
            chaser_x, chaser_y = chaser_poi[0], chaser_poi[1]
            chaser_state_ = [chaser_x, chaser_y, runner_x, runner_y]
            chaser_state = chaser_state_
            step += 1
            if done:
                print('Episode: %d\tsteps: %d' % (epi + 1, step + 1))
                break
            else:
                runner_action = runner.act(runner_state)
                runner_poi, runner_dir, _, done, _ = env.step(
                    runner_action, runner.name)
                if display:
                    env.render()
                    time.sleep(1 / fps)
                runner_x, runner_y = runner_poi[0], runner_poi[1]
                runner_state = [runner_x, runner_y, chaser_x, chaser_y]
                step += 1
                if done:
                    print('Episode: %d\tsteps: %d' % (epi + 1, step + 1))
                    break
コード例 #5
0
def chaser_dqn(env_name,
               episode_count=1000,
               display=True,
               fps=10,
               need_reload=False,
               reload_path=None):
    """Use DQN to train Chaser to chase a random runner agent.
    """
    env = gym.make(env_name)
    # Epsilon Greedy agent with network to make policy
    chaser = EGreedyAgent(default_reward=-1.0,
                          name='chaser',
                          color=(1.0, 0.0, 0.0),
                          env=env,
                          agent_type=AgentType.Chaser,
                          features_n=4,
                          memory_capacity=1024,
                          need_reload=need_reload,
                          reload_path=reload_path)

    # Randomly make choice
    runner = RandomAgent(default_reward=1.0,
                         name='runner',
                         color=(0.0, 1.0, 0.0),
                         env=env,
                         agent_type=AgentType.Runner)

    env.add_agent(chaser)
    env.add_agent(runner)

    reward = 0
    done = False

    # Total steps contains every episode's steps
    total_steps = 0

    for epi in range(episode_count):
        state_map = env.reset()
        chaser_info = state_map[chaser.name]
        runner_info = state_map[runner.name]

        chaser_x = chaser_info['state'][0]
        chaser_y = chaser_info['state'][1]
        runner_x = runner_info['state'][0]
        runner_y = runner_info['state'][1]

        # State contains four elements
        chaser_state = [chaser_x, chaser_y, runner_x, runner_y]
        runner_state = [runner_x, runner_y, chaser_x, chaser_y]
        # current episode's steps
        step = 0
        while True:
            if display:
                env.render()
                time.sleep(1 / fps)
            action = chaser.act(chaser_state)
            chaser_poi, direction, reward, done, _ = env.step(
                action, chaser.name)
            chaser_x, chaser_y = chaser_poi[0], chaser_poi[1]
            chaser_state_ = [chaser_x, chaser_y, runner_x, runner_y]

            # Put this transition into memory
            chaser.memory.push(chaser_state, action, chaser_state_, reward)
            # Update current state
            chaser_state = chaser_state_
            if total_steps % 10 == 0:
                # Every 10 steps optimize model
                runner_loss = chaser.optimize_model()
            step += 1
            total_steps += 1
            if done:
                print('Episode: %d\tsteps: %d\tLoss: %f' %
                      (epi + 1, step + 1, runner_loss))
                break
            else:
                runner_action = runner.act(runner_state)
                runner_poi, runner_dir, _, done, _ = env.step(
                    runner_action, runner.name)
                if display:
                    env.render()
                    time.sleep(1 / fps)
                runner_x, runner_y = runner_poi[0], runner_poi[1]
                runner_state = [runner_x, runner_y, chaser_x, chaser_y]
                step += 1
                if done:
                    # 添加 正值 reward 到 集合中
                    # a = get_reverse_action(runner_action)
                    # s = chaser_state
                    # r = 10
                    # s_ = [chaser_x, chaser_y, runner_x, runner_y]
                    # chaser.memory.push(s, a, s_, r)
                    print('Episode: %d\tsteps: %d\tLoss: %f' %
                          (epi + 1, step + 1, runner_loss))
                    break
            # Update the target network, copying all weights and biases in DQN
            if total_steps % 500 == 0:
                # target net params replaced
                print('Target net params Replaced!')
                chaser.target_net.load_state_dict(
                    chaser.policy_net.state_dict())
                chaser.save('chaser-2000.pkl')
コード例 #6
0
def chaser_qt_greedy(env_name,
                     restore_path,
                     episode_count=1000,
                     display=True,
                     fps=10):
    env = gym.make(env_name)
    chaser = QTGreedyAgent(default_reward=-1.0,
                           name='chaser',
                           color=(1.0, 0.0, 0.0),
                           env=env,
                           agent_type=AgentType.Chaser,
                           restore_path=restore_path,
                           n_width=env.n_width,
                           n_height=env.n_height)

    runner = RandomAgent(default_reward=1.0,
                         name='runner',
                         color=(0.0, 1.0, 0.0),
                         env=env,
                         agent_type=AgentType.Runner)

    env.add_agent(chaser)
    env.add_agent(runner)

    for epi in range(episode_count):
        state_map = env.reset()
        chaser_info = state_map[chaser.name]
        runner_info = state_map[runner.name]

        chaser_x = chaser_info['state'][0]
        chaser_y = chaser_info['state'][1]
        runner_x = runner_info['state'][0]
        runner_y = runner_info['state'][1]

        chaser_state = [chaser_x, chaser_y, runner_x, runner_y]
        runner_state = [runner_x, runner_y, chaser_x, chaser_y]

        step = 0
        while True:
            if display:
                env.render()
                time.sleep(1 / fps)
            action = chaser.act(chaser_state)
            chaser_poi, direction, _, done, _ = env.step(action, chaser.name)
            chaser_x, chaser_y = chaser_poi[0], chaser_poi[1]
            chaser_state_ = [chaser_x, chaser_y, runner_x, runner_y]
            chaser_state = chaser_state_
            step += 1
            if done:
                print('Episode: %d\tsteps: %d' % (epi + 1, step + 1))
                break
            else:
                runner_action = runner.act(runner_state)
                runner_poi, runner_dir, _, done, _ = env.step(
                    runner_action, runner.name)
                if display:
                    env.render()
                    time.sleep(1 / fps)
                runner_x, runner_y = runner_poi[0], runner_poi[1]
                runner_state = [runner_x, runner_y, chaser_x, chaser_y]
                step += 1
                if done:
                    print('Episode: %d\tsteps: %d' % (epi + 1, step + 1))
                    break
コード例 #7
0
def chaser_q_learning(env_name,
                      episode_count=1000,
                      display=True,
                      fps=10,
                      need_reload=True,
                      reload_path=None):
    env = gym.make(env_name)
    chaser = QTEGreedyAgent(default_reward=-0.1,
                            name='chaser',
                            color=(1.0, 0.0, 0.0),
                            env=env,
                            agent_type=AgentType.Chaser,
                            n_width=env.n_width,
                            n_height=env.n_height,
                            need_reload=need_reload,
                            reload_path=reload_path)
    runner = RandomAgent(default_reward=1.0,
                         name='runner',
                         color=(0.0, 1.0, 0.0),
                         env=env,
                         agent_type=AgentType.Runner)
    env.add_agent(chaser)
    env.add_agent(runner)
    # Total steps contains every episode's steps
    total_steps = 0
    for epi in range(episode_count):
        state_map = env.reset()
        chaser_info = state_map[chaser.name]
        runner_info = state_map[runner.name]
        chaser_x = chaser_info['state'][0]
        chaser_y = chaser_info['state'][1]
        runner_x = runner_info['state'][0]
        runner_y = runner_info['state'][1]

        # State contains four elements
        chaser_state = [chaser_x, chaser_y, runner_x, runner_y]
        runner_state = [runner_x, runner_y, chaser_x, chaser_y]
        # current episode's steps
        step = 0
        while True:
            if display:
                env.render()
                time.sleep(1 / fps)
            action = chaser.act(chaser_state)
            chaser_poi, direction, reward, done, _ = env.step(
                action, chaser.name)
            chaser_x, chaser_y = chaser_poi[0], chaser_poi[1]
            chaser_state_ = [chaser_x, chaser_y, runner_x, runner_y]

            chaser.update(chaser_state, action, reward, chaser_state_)
            # Update current state
            chaser_state = chaser_state_
            step += 1
            total_steps += 1
            if done:
                print('Episode: %d\tsteps: %d' %
                      (epi + 1, step + 1))
                break
            else:
                runner_action = runner.act(runner_state)
                runner_poi, runner_dir, _, done, _ = env.step(
                    runner_action, runner.name)
                if display:
                    env.render()
                    time.sleep(1 / fps)
                runner_x, runner_y = runner_poi[0], runner_poi[1]
                runner_state = [runner_x, runner_y, chaser_x, chaser_y]
                step += 1
                if done:
                    # 添加 正值 reward 到 集合中
                    # a = get_reverse_action(runner_action)
                    # s = chaser_state
                    # r = 10
                    # s_ = [chaser_x, chaser_y, runner_x, runner_y]
                    # chaser.memory.push(s, a, s_, r)
                    print('Episode: %d\tsteps: %d' %
                          (epi + 1, step + 1))
                    break
            # Update the target network, copying all weights and biases in DQN
            if total_steps % 500 == 0:
                chaser.save()