Beispiel #1
0
def run_maze_learner(mission, clients):

    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)

    else:
        visualizer = ConsoleVisualizer()


#    with TensorboardVisualizer() as visualizer:
    env = MazeEnvironment(mission,
                          [str.split(client, ':') for client in clients])
    env.recording = False

    #    explorer = LinearEpsilonGreedyExplorer(1, 0.1, 10000)
    #        model = DeepQNeuralNetwork((4, 84, 84), (env.available_actions,), momentum=0, visualizer=visualizer)
    #        memory = TemporalMemory(50000, model.input_shape[1:], model.input_shape[0], False)

    agent = RandomAgent(
        "rand", 3
    )  #DQNAgent("Maze DQN Agent", env.available_actions, model, memory, explorer=explorer,
    #visualizer=visualizer)

    #        exp = SingleAgentExperiment("Malmo Cliff Walking", agent, env, 500000, warm_up_timesteps=500,
    #                                    visualizer=visualizer)
    #        exp.episode_end += on_episode_end

    #        visualizer.initialize(MALMO_MAZE_FOLDER, model, CntkConverter())

    #   with Popen(['tensorboard', '--logdir=%s' % path.join(MALMO_MAZE_FOLDER, path.pardir), '--port=6006']):
    EPOCH_SIZE = 250000
    max_training_steps = 50 * EPOCH_SIZE
    state = env.reset()
    reward = 0
    agent_done = False
    viz_rewards = []
    for step in range(1, max_training_steps + 1):

        #        action = agent.act(state, reward, agent_done, is_training=True)
        # check if env needs reset
        if env.done:
            visualize_training(visualizer, step, viz_rewards)
            agent.inject_summaries(step)
            viz_rewards = []
            state = env.reset()

            # select an action
        action = agent.act(state, reward, agent_done, is_training=True)
        print('ACTION BEING TAKEN: ', action)

        # take a step
        state, reward, agent_done = env.do(action)
        viz_rewards.append(reward)

        if (step % EPOCH_SIZE) == 0:
            model.save('%s-%s-dqn_%d.model' %
                       (backend, environment, step / EPOCH_SIZE))
Beispiel #2
0
def agent_factory(name, role, type, clients, max_epochs, logdir, visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)

    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients,
                              builder,
                              role=role,
                              randomize_positions=True)

    if role == 0:
        agent = PigChaseChallengeAgent(name)

        obs = env.reset()
        reward = 0
        agent_done = False

        while True:
            if env.done:
                obs = env.reset()

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)
            # take a step
            obs, reward, agent_done = env.do(action)

    else:

        if type == 'astar':
            agent = FocusedAgent(name, ENV_TARGET_NAMES[0])
        else:
            agent = RandomAgent(name, env.available_actions)

        obs = env.reset()
        reward = 0
        agent_done = False
        viz_rewards = []

        max_training_steps = EPOCH_SIZE * max_epochs
        for step in range(1, max_training_steps + 1):

            # check if env needs reset
            if env.done:

                visualize_training(visualizer, step, viz_rewards)
                viz_rewards = []
                obs = env.reset()

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)
            # take a step
            obs, reward, agent_done = env.do(action)
            viz_rewards.append(reward)

            agent.inject_summaries(step)
Beispiel #3
0
    def __init__(self, name, visualizer=None):

        nb_actions = len(ENV_ACTIONS)
        super(PigChaseChallengeAgent, self).__init__(name, nb_actions,
                                                     visualizer = visualizer)

        self._agents = []
        self._agents.append(FocusedAgent(name, ENV_TARGET_NAMES[0],
                                         visualizer = visualizer))
        self._agents.append(RandomAgent(name, nb_actions,
                                        visualizer = visualizer))
        self.current_agent = self._select_agent(P_FOCUSED)
Beispiel #4
0
 def __init__(self, name, visualizer=None, p_focused=0.75):
     nb_actions = len(ENV_ACTIONS)
     super(PigChaseChallengeAgent, self).__init__(name,
                                                  nb_actions,
                                                  visualizer=visualizer)
     self.epi_counter = 0
     self._agents = []
     self._agents.append(
         FocusedAgent(name, ENV_TARGET_NAMES[0], visualizer=visualizer))
     self._agents.append(
         RandomAgent(name, nb_actions, visualizer=visualizer))
     self.p_focused = p_focused
     self.current_agent = self._select_agent(p_focused)
    def __init__(self,
                 name,
                 visualizer=None,
                 focused=True,
                 random=True,
                 bad_guy=True,
                 standstill=False):
        nb_actions = len(ENV_ACTIONS)
        super(ChallengerFactory, self).__init__(name,
                                                nb_actions,
                                                visualizer=visualizer)
        # List of possible agents
        self._agents = []
        self._agent_probabilities = []
        self._helmets = []

        print("Allowing challengers:")
        if focused:
            self._agents.append(
                FocusedAgent(name, ENV_TARGET_NAMES[0], visualizer=visualizer))
            self._agent_probabilities.append(FocusedAgentWeight)
            self._helmets.append(ChallengerFactory.AGENT_TYPE[FocusedAgent])
            print("   FocusedAgent")

        if random:
            self._agents.append(
                RandomAgent(name, nb_actions, visualizer=visualizer))
            self._agent_probabilities.append(RandomAgentWeight)
            self._helmets.append(ChallengerFactory.AGENT_TYPE[RandomAgent])
            print("   RandomAgent")

        if bad_guy:
            self._agents.append(BadGuy(name, visualizer=visualizer))
            self._agent_probabilities.append(BadGuyWeight)
            self._helmets.append(ChallengerFactory.AGENT_TYPE[BadGuy])
            print("   BadGuy")

        if standstill:
            self._agents.append(StandstillAgent(name, visualizer=visualizer))
            self._agent_probabilities.append(StandstillAgentWeight)
            self._helmets.append(ChallengerFactory.AGENT_TYPE[StandstillAgent])
            print("   StandstillAgent")

        # Select first agent
        n = sum(self._agent_probabilities)
        self._agent_probabilities = [
            item / n for item in self._agent_probabilities
        ]
        self.current_agent = self._select_agent(self._agent_probabilities)
Beispiel #6
0
    def __init__(self, name, visualizer=None, p_focused_new=0.7):
        if p_focused_new is None:
            self.p_focused = p_focused_new
        else:
            self.p_focused = P_FOCUSED

        nb_actions = len(ENV_ACTIONS)
        super(PigChaseChallengeAgent, self).__init__(name,
                                                     nb_actions,
                                                     visualizer=visualizer)

        self._agents = []
        self._agents.append(
            FocusedAgent(name, ENV_TARGET_NAMES[0], visualizer=visualizer))
        self._agents.append(
            RandomAgent(name, nb_actions, visualizer=visualizer))
        self.current_agent = self._select_agent(self.p_focused)
Beispiel #7
0
def run_maze_learner(mission, clients):

    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)

    else:
        visualizer = ConsoleVisualizer()

    env = MazeEnvironment(mission,
                          [str.split(client, ':') for client in clients])
    env.recording = False

    agent = RandomAgent("rand", 3, delay_between_action=1.5)

    #taking random actions
    EPOCH_SIZE = 250000
    max_training_steps = 50 * EPOCH_SIZE
    state = env.reset()
    reward = 0
    agent_done = False
    viz_rewards = []
    for step in range(1, max_training_steps + 1):

        action = agent.act(state, reward, agent_done, is_training=True)
        # check if env needs reset
        if env.done:
            visualize_training(visualizer, step, viz_rewards)
            agent.inject_summaries(step)
            viz_rewards = []
            state = env.reset()

            # select an action
        action = agent.act(state, reward, agent_done, is_training=True)
        print('ACTION BEING TAKEN: ', action)

        # take a step
        state, reward, agent_done = env.do(action)
        viz_rewards.append(reward)

        if (step % EPOCH_SIZE) == 0:
            model.save('%s-%s-dqn_%d.model' %
                       (backend, environment, step / EPOCH_SIZE))
Beispiel #8
0
def agent_factory(name, role, type, clients, max_epochs, logdir, visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)
    
    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients, builder, role=role,
                              randomize_positions=True)

    if role == 0:
        agent1 = FocusedAgent(name, ENV_TARGET_NAMES[0])
        agent2 = RandomAgent(name, env.available_actions)
        agent3 = BadAgent(name)
        
        agent_list = [agent1, agent2, agent3]# three types of agent
        agent = agent1
        
        obs = env.reset()
        reward = 0
        agent_done = False
        max_training_steps = EPOCH_SIZE * max_epochs
        epoch = 0
        for step in range(1, max_training_steps+1):
            if env.done:
                obs = env.reset()
                epoch += 1
                agent = agent_list[epoch/10 % 3]# change for every 10 episodes
            # select an action
            action = agent.act(obs, reward, agent_done)
            # take a step
            obs, reward, agent_done = env.do(action)
                


    else:
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        with tf.Session(config = config) as sess:
            agent1 = BayesAgent(name, ENV_TARGET_NAMES[0], 'Agent_1', True, sess)
            agent2 = RandomAgent(name, env.available_actions)
            agent3 = BadAgent(name)
            agent4 = FocusedAgent(name, ENV_TARGET_NAMES[0])
            if not agent1.save:
                sess.run(tf.global_variables_initializer()) 
                print "Initialize"
            agent_list = [agent1, agent2, agent3, agent4]# three types of agents
            agent = agent1
            obs = env.reset()
            agent1.reset(obs)
            reward = 0
            agent_done = False
            viz_rewards = []
            avg = []
            epoch = 0
            s = 1
            max_training_steps = EPOCH_SIZE * max_epochs
            for step in range(1, max_training_steps+1):
                # check if env needs reset
                if agent_done:
                    obs = env.reset()
                    agent1.reset(obs)
                    avg.append(sum(viz_rewards))
                    print "Epoch:%d, accumulative rewards: %d"%(epoch, sum(viz_rewards))
                    visualize_training(visualizer, step, viz_rewards)
                    viz_rewards = []
                    epoch += 1
                    agent = agent_list[epoch/5 % 4]# change for every episodes
                    if epoch%10 == 0:
                        agent1.reset_collaborator()
                    s = 1
                    
                # select an action
                action = agent.act(obs, reward, agent_done, is_training = True)
                # take a step
                next_obs, reward, agent_done = env.do(action)
                agent1.collecting(obs, action, reward, next_obs, agent_done, s)
                s += 1
                obs = next_obs
                viz_rewards.append(reward)
                    
                if step % 100 == 0:
                    agent1.save_replay_buffer()
                #
                agent1.inject_summaries(step)
                
            print "Average Reward: ", 1.*sum(avg)/len(avg)
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
#  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
# the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
#  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ===================================================================================================================

from common import ENV_AGENT_NAMES
from evaluation import PigChaseEvaluator
from environment import PigChaseTopDownStateBuilder
from malmopy.agent import RandomAgent

if __name__ == '__main__':
    # Warn for Agent name !!!

    clients = [('127.0.0.1', 10000), ('127.0.0.1', 10001)]
    agent = RandomAgent(ENV_AGENT_NAMES[1], 3)

    eval = PigChaseEvaluator(clients, agent, agent,
                             PigChaseTopDownStateBuilder())
    eval.run()

    eval.save('My Exp 1', 'pig_chase_results.json')
def agent_factory(name, role, baseline_agent, clients, max_epochs, logdir,
                  visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)
    batch_size = 32

    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients,
                              builder,
                              role=role,
                              randomize_positions=True)

    if role == 0:
        agent = PigChaseChallengeAgent(name)

        if type(agent.current_agent) == RandomAgent:
            agent_type = PigChaseEnvironment.AGENT_TYPE_1
        else:
            agent_type = PigChaseEnvironment.AGENT_TYPE_2
        ##Aqui el state hay que modificarlo para que se adapte a lo que la red neurnal necesita
        state = env.reset(agent_type)

        reward = 0
        agent_done = False
        num_actions = 0
        while True:

            # take a step

            # reset if needed
            if env.done:
                print(agent.check_memory(batch_size))
                if type(agent.current_agent) == RandomAgent:
                    agent_type = PigChaseEnvironment.AGENT_TYPE_1
                else:
                    agent_type = PigChaseEnvironment.AGENT_TYPE_2
                ##Aqui el state habria que modificarlo de nuevo

                if num_actions > batch_size:
                    print('Entrando a replay 1')
                    agent.replay(batch_size)
                state = env.reset(agent_type)

            # select an action
            #print('Accion del role 1')
            action = agent.act(state, reward, agent_done, is_training=True)
            next_state, reward, agent_done = env.do(action)
            num_actions = num_actions + 1
            next_state2 = adapt_state(next_state)
            agent.remember(state, action, reward, next_state2, agent_done)
            ##Aqui state= obs (que seria el estado anterior estado modificado)
            state = next_state
        ##No estoy seguro de si esto va aqui por el while true (no se cuando acaba). Deberia ir cuando acaba una partida
        ##Hacer check si hace el replay o no. Si no lo hace nunca, meter el replay dentro de el if(env.done (signifca que una etapa ha acabado y empieza otra, por lo que deberia esta bien))

    else:

        if baseline_agent == 'astar':
            agent = FocusedAgent(name, ENV_TARGET_NAMES[0])
        else:
            agent = RandomAgent(name, env.available_actions)

        state = env.reset()
        reward = 0
        agent_done = False
        viz_rewards = []

        max_training_steps = EPOCH_SIZE * max_epochs
        for step in six.moves.range(1, max_training_steps + 1):

            # check if env needs reset
            if env.done:

                visualize_training(visualizer, step, viz_rewards)
                viz_rewards = []
                ##No se si esto se tiene que hacer tambien aqui o no, hacer check
                if agent.check_memory(batch_size) > batch_size:
                    print('Entrando a replay 2')
                    agent.replay(batch_size)
                state = env.reset()

            # select an action
            #print('Accion del role 2')
            action = agent.act(state, reward, agent_done, is_training=True)
            # take a step
            next_state, reward, agent_done = env.do(action)
            next_state2 = adapt_state(next_state)
            agent.remember(state, action, reward, next_state2, agent_done)
            ##Aqui state= obs (que seria el estado anterior estado modificado)
            state = next_state
            #obs, reward, agent_done = env.do(action)
            viz_rewards.append(reward)

            agent.inject_summaries(step)
Beispiel #11
0
def agent_factory(name, role, baseline_agent, clients, max_epochs, logdir,
                  visualizer):

    assert len(clients) >= 2, 'Not enough clients (need at least 2)'
    clients = parse_clients_args(clients)

    builder = PigChaseSymbolicStateBuilder()
    env = PigChaseEnvironment(clients,
                              builder,
                              role=role,
                              randomize_positions=True)

    if role == 0:
        agent = PigChaseChallengeAgent(name)
        obs = env.reset(get_agent_type(agent))

        reward = 0
        agent_done = False

        while True:
            if env.done:
                while True:
                    obs = env.reset(get_agent_type(agent))
                    if obs:
                        break

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)

            # reset if needed
            if env.done:
                obs = env.reset(get_agent_type(agent))

            # take a step
            obs, reward, agent_done = env.do(action)

    else:

        if baseline_agent == 'tabq':
            agent = TabularQLearnerAgent(name, visualizer)
        elif baseline_agent == 'astar':
            agent = FocusedAgent(name, ENV_TARGET_NAMES[0])
        else:
            agent = RandomAgent(name, env.available_actions)

        obs = env.reset()
        reward = 0
        agent_done = False
        viz_rewards = []

        max_training_steps = EPOCH_SIZE * max_epochs
        for step in six.moves.range(1, max_training_steps + 1):

            # check if env needs reset
            if env.done:
                while True:
                    if len(viz_rewards) == 0:
                        viz_rewards.append(0)
                    visualize_training(visualizer, step, viz_rewards)
                    tag = "Episode End Conditions"
                    visualizer.add_entry(
                        step, '%s/timeouts per episode' % tag,
                        env.end_result == "command_quota_reached")
                    visualizer.add_entry(
                        step, '%s/agent_1 defaults per episode' % tag,
                        env.end_result == "Agent_1_defaulted")
                    visualizer.add_entry(
                        step, '%s/agent_2 defaults per episode' % tag,
                        env.end_result == "Agent_2_defaulted")
                    visualizer.add_entry(step,
                                         '%s/pig caught per episode' % tag,
                                         env.end_result == "caught_the_pig")
                    agent.inject_summaries(step)
                    viz_rewards = []
                    obs = env.reset()
                    if obs:
                        break

            # select an action
            action = agent.act(obs, reward, agent_done, is_training=True)
            # take a step
            obs, reward, agent_done = env.do(action)
            viz_rewards.append(reward)