Beispiel #1
0
def agent_factory(name, role, type, clients, max_epochs, logdir, visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)

    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients,
                              builder,
                              role=role,
                              randomize_positions=True)

    if role == 0:
        agent = PigChaseChallengeAgent(name)

        obs = env.reset()
        reward = 0
        agent_done = False

        while True:
            if env.done:
                obs = env.reset()

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)
            # take a step
            obs, reward, agent_done = env.do(action)

    else:

        if type == 'astar':
            agent = FocusedAgent(name, ENV_TARGET_NAMES[0])
        else:
            agent = RandomAgent(name, env.available_actions)

        obs = env.reset()
        reward = 0
        agent_done = False
        viz_rewards = []

        max_training_steps = EPOCH_SIZE * max_epochs
        for step in range(1, max_training_steps + 1):

            # check if env needs reset
            if env.done:

                visualize_training(visualizer, step, viz_rewards)
                viz_rewards = []
                obs = env.reset()

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)
            # take a step
            obs, reward, agent_done = env.do(action)
            viz_rewards.append(reward)

            agent.inject_summaries(step)
    def __init__(self,
                 name,
                 visualizer=None,
                 focused=True,
                 random=True,
                 bad_guy=True,
                 standstill=False):
        nb_actions = len(ENV_ACTIONS)
        super(ChallengerFactory, self).__init__(name,
                                                nb_actions,
                                                visualizer=visualizer)
        # List of possible agents
        self._agents = []
        self._agent_probabilities = []
        self._helmets = []

        print("Allowing challengers:")
        if focused:
            self._agents.append(
                FocusedAgent(name, ENV_TARGET_NAMES[0], visualizer=visualizer))
            self._agent_probabilities.append(FocusedAgentWeight)
            self._helmets.append(ChallengerFactory.AGENT_TYPE[FocusedAgent])
            print("   FocusedAgent")

        if random:
            self._agents.append(
                RandomAgent(name, nb_actions, visualizer=visualizer))
            self._agent_probabilities.append(RandomAgentWeight)
            self._helmets.append(ChallengerFactory.AGENT_TYPE[RandomAgent])
            print("   RandomAgent")

        if bad_guy:
            self._agents.append(BadGuy(name, visualizer=visualizer))
            self._agent_probabilities.append(BadGuyWeight)
            self._helmets.append(ChallengerFactory.AGENT_TYPE[BadGuy])
            print("   BadGuy")

        if standstill:
            self._agents.append(StandstillAgent(name, visualizer=visualizer))
            self._agent_probabilities.append(StandstillAgentWeight)
            self._helmets.append(ChallengerFactory.AGENT_TYPE[StandstillAgent])
            print("   StandstillAgent")

        # Select first agent
        n = sum(self._agent_probabilities)
        self._agent_probabilities = [
            item / n for item in self._agent_probabilities
        ]
        self.current_agent = self._select_agent(self._agent_probabilities)
Beispiel #3
0
def agent_factory(name, role, type, clients, max_epochs, logdir, visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)

    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients,
                              builder,
                              role=role,
                              randomize_positions=True)

    if role == 0:
        agent = FocusedAgent(name, ENV_TARGET_NAMES[0])

        obs = env.reset()
        reward = 0
        agent_done = False

        while True:
            if env.done:
                obs = env.reset()

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)
            # take a step
            obs, reward, agent_done = env.do(action)

    else:
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            agent = BayesAgent(name, ENV_TARGET_NAMES[0], 'Agent_1', False,
                               sess)
            if not agent.save:
                sess.run(tf.global_variables_initializer())
                print "Initialize"
            obs = env.reset()
            agent.reset(obs)
            reward = 0
            agent_done = False
            viz_rewards = []
            avg = []
            max_training_steps = EPOCH_SIZE * max_epochs
            epoch = 0
            for step in range(1, max_training_steps + 1):
                # check if env needs reset
                if env.done:
                    avg.append(sum(viz_rewards))
                    print "Epoch:%d, accumulative rewards: %d" % (
                        epoch, sum(viz_rewards))
                    visualize_training(visualizer, step, viz_rewards)
                    viz_rewards = []
                    obs = env.reset()
                    agent.reset(obs)
                    epoch += 1

                # select an action
                action = agent.act(obs, reward, agent_done, is_training=False)
                obs, reward, agent_done = env.do(action)
                viz_rewards.append(reward)
                #
                agent.inject_summaries(step)
            print "Average Reward: ", 1. * sum(avg) / len(avg)
Beispiel #4
0
def run_challenge_agent(clients, is100):
    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients, builder, role=0,
                              randomize_positions=True)
    agent = FocusedAgent(ENV_AGENT_NAMES[0], ENV_TARGET_NAMES[0])
    agent_loop(agent, env, None, is100)
Beispiel #5
0
def agent_factory(name, role, type, clients, max_epochs, logdir, visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)
    
    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients, builder, role=role,
                              randomize_positions=True)

    if role == 0:
        agent1 = FocusedAgent(name, ENV_TARGET_NAMES[0])
        agent2 = RandomAgent(name, env.available_actions)
        agent3 = BadAgent(name)
        
        agent_list = [agent1, agent2, agent3]# three types of agent
        agent = agent1
        
        obs = env.reset()
        reward = 0
        agent_done = False
        max_training_steps = EPOCH_SIZE * max_epochs
        epoch = 0
        for step in range(1, max_training_steps+1):
            if env.done:
                obs = env.reset()
                epoch += 1
                agent = agent_list[epoch/10 % 3]# change for every 10 episodes
            # select an action
            action = agent.act(obs, reward, agent_done)
            # take a step
            obs, reward, agent_done = env.do(action)
                


    else:
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config = config) as sess:
            agent1 = BayesAgent(name, ENV_TARGET_NAMES[0], 'Agent_1', True, sess)
            agent2 = RandomAgent(name, env.available_actions)
            agent3 = BadAgent(name)
            agent4 = FocusedAgent(name, ENV_TARGET_NAMES[0])
            if not agent1.save:
                sess.run(tf.global_variables_initializer()) 
                print "Initialize"
            agent_list = [agent1, agent2, agent3, agent4]# three types of agents
            agent = agent1
            obs = env.reset()
            agent1.reset(obs)
            reward = 0
            agent_done = False
            viz_rewards = []
            avg = []
            epoch = 0
            s = 1
            max_training_steps = EPOCH_SIZE * max_epochs
            for step in range(1, max_training_steps+1):
                # check if env needs reset
                if agent_done:
                    obs = env.reset()
                    agent1.reset(obs)
                    avg.append(sum(viz_rewards))
                    print "Epoch:%d, accumulative rewards: %d"%(epoch, sum(viz_rewards))
                    visualize_training(visualizer, step, viz_rewards)
                    viz_rewards = []
                    epoch += 1
                    agent = agent_list[epoch/5 % 4]# change for every episodes
                    if epoch%10 == 0:
                        agent1.reset_collaborator()
                    s = 1
                    
                # select an action
                action = agent.act(obs, reward, agent_done, is_training = True)
                # take a step
                next_obs, reward, agent_done = env.do(action)
                agent1.collecting(obs, action, reward, next_obs, agent_done, s)
                s += 1
                obs = next_obs
                viz_rewards.append(reward)
                    
                if step % 100 == 0:
                    agent1.save_replay_buffer()
                #
                agent1.inject_summaries(step)
                
            print "Average Reward: ", 1.*sum(avg)/len(avg)
Beispiel #6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from common import ENV_AGENT_NAMES, ENV_TARGET_NAMES
from evaluation import PigChaseEvaluator
from environment import PigChaseSymoblicStateBuilder
from agent import PigChaseChallengeAgent, FocusedAgent
from malmopy.visualization import ConsoleVisualizer

if __name__ == '__main__':
    clients = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
    builder = PigChaseSymbolicStateBuilder()
    agent = FocusedAgent(ENV_AGENT_NAMES[1], ENV_TARGET_NAMES[0])
    eval = PigChaseEvaluator(clients, agent, agent, builder)
    eval.run()
    eval.save('baseline_exp', 'baseline_results.json')
Beispiel #7
0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
#  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ===================================================================================================================

from common import ENV_AGENT_NAMES, ENV_TARGET_NAMES
from evaluation import PigChaseEvaluator
from environment import PigChaseTopDownStateBuilder, PigChaseSymbolicStateBuilder
from malmopy.agent import RandomAgent
from agent import PigChaseChallengeAgent, FocusedAgent
from json import dump

if __name__ == '__main__':
    # Warn for Agent name !!!
    name = 'Agent_1'
    clients = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
    agent = FocusedAgent(
        ENV_AGENT_NAMES[1], ENV_TARGET_NAMES[0]
    )  ##Defines the type of agents that are going to be used

    eval = PigChaseEvaluator(
        clients, agent, agent,
        PigChaseSymbolicStateBuilder())  ##Initializes the experiment
    print('Simbolic state builder running')
    eval.run()

    eval.save('My Exp 1', '.')
def agent_factory(name, role, baseline_agent, clients, max_epochs, logdir,
                  visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)
    batch_size = 32

    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients,
                              builder,
                              role=role,
                              randomize_positions=True)

    if role == 0:
        agent = PigChaseChallengeAgent(name)

        if type(agent.current_agent) == RandomAgent:
            agent_type = PigChaseEnvironment.AGENT_TYPE_1
        else:
            agent_type = PigChaseEnvironment.AGENT_TYPE_2
        ##Aqui el state hay que modificarlo para que se adapte a lo que la red neurnal necesita
        state = env.reset(agent_type)

        reward = 0
        agent_done = False
        num_actions = 0
        while True:

            # take a step

            # reset if needed
            if env.done:
                print(agent.check_memory(batch_size))
                if type(agent.current_agent) == RandomAgent:
                    agent_type = PigChaseEnvironment.AGENT_TYPE_1
                else:
                    agent_type = PigChaseEnvironment.AGENT_TYPE_2
                ##Aqui el state habria que modificarlo de nuevo

                if num_actions > batch_size:
                    print('Entrando a replay 1')
                    agent.replay(batch_size)
                state = env.reset(agent_type)

            # select an action
            #print('Accion del role 1')
            action = agent.act(state, reward, agent_done, is_training=True)
            next_state, reward, agent_done = env.do(action)
            num_actions = num_actions + 1
            next_state2 = adapt_state(next_state)
            agent.remember(state, action, reward, next_state2, agent_done)
            ##Aqui state= obs (que seria el estado anterior estado modificado)
            state = next_state
        ##No estoy seguro de si esto va aqui por el while true (no se cuando acaba). Deberia ir cuando acaba una partida
        ##Hacer check si hace el replay o no. Si no lo hace nunca, meter el replay dentro de el if(env.done (signifca que una etapa ha acabado y empieza otra, por lo que deberia esta bien))

    else:

        if baseline_agent == 'astar':
            agent = FocusedAgent(name, ENV_TARGET_NAMES[0])
        else:
            agent = RandomAgent(name, env.available_actions)

        state = env.reset()
        reward = 0
        agent_done = False
        viz_rewards = []

        max_training_steps = EPOCH_SIZE * max_epochs
        for step in six.moves.range(1, max_training_steps + 1):

            # check if env needs reset
            if env.done:

                visualize_training(visualizer, step, viz_rewards)
                viz_rewards = []
                ##No se si esto se tiene que hacer tambien aqui o no, hacer check
                if agent.check_memory(batch_size) > batch_size:
                    print('Entrando a replay 2')
                    agent.replay(batch_size)
                state = env.reset()

            # select an action
            #print('Accion del role 2')
            action = agent.act(state, reward, agent_done, is_training=True)
            # take a step
            next_state, reward, agent_done = env.do(action)
            next_state2 = adapt_state(next_state)
            agent.remember(state, action, reward, next_state2, agent_done)
            ##Aqui state= obs (que seria el estado anterior estado modificado)
            state = next_state
            #obs, reward, agent_done = env.do(action)
            viz_rewards.append(reward)

            agent.inject_summaries(step)
Beispiel #9
0
def agent_factory(name, role, baseline_agent, clients, max_epochs, logdir,
                  visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)

    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients,
                              builder,
                              role=role,
                              randomize_positions=True)

    if role == 0:
        agent = PigChaseChallengeAgent(name)
        obs = env.reset(get_agent_type(agent))

        reward = 0
        agent_done = False

        while True:
            if env.done:
                while True:
                    obs = env.reset(get_agent_type(agent))
                    if obs:
                        break

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)

            # reset if needed
            if env.done:
                obs = env.reset(get_agent_type(agent))

            # take a step
            obs, reward, agent_done = env.do(action)

    else:

        if baseline_agent == 'tabq':
            agent = TabularQLearnerAgent(name, visualizer)
        elif baseline_agent == 'astar':
            agent = FocusedAgent(name, ENV_TARGET_NAMES[0])
        else:
            agent = RandomAgent(name, env.available_actions)

        obs = env.reset()
        reward = 0
        agent_done = False
        viz_rewards = []

        max_training_steps = EPOCH_SIZE * max_epochs
        for step in six.moves.range(1, max_training_steps + 1):

            # check if env needs reset
            if env.done:
                while True:
                    if len(viz_rewards) == 0:
                        viz_rewards.append(0)
                    visualize_training(visualizer, step, viz_rewards)
                    tag = "Episode End Conditions"
                    visualizer.add_entry(
                        step, '%s/timeouts per episode' % tag,
                        env.end_result == "command_quota_reached")
                    visualizer.add_entry(
                        step, '%s/agent_1 defaults per episode' % tag,
                        env.end_result == "Agent_1_defaulted")
                    visualizer.add_entry(
                        step, '%s/agent_2 defaults per episode' % tag,
                        env.end_result == "Agent_2_defaulted")
                    visualizer.add_entry(step,
                                         '%s/pig caught per episode' % tag,
                                         env.end_result == "caught_the_pig")
                    agent.inject_summaries(step)
                    viz_rewards = []
                    obs = env.reset()
                    if obs:
                        break

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)
            # take a step
            obs, reward, agent_done = env.do(action)
            viz_rewards.append(reward)