def agent_factory(name, role, kind, clients, max_episodes, max_actions, logdir, quit): assert len( clients ) >= 2, 'There are not enough Malmo clients in the pool (need at least 2)' clients = parse_clients_args(clients) visualizer = ConsoleVisualizer(prefix='Agent %d' % role) if role == 0: env = PigChaseEnvironment(clients, PigChaseSymbolicStateBuilder(), actions=ENV_ACTIONS, role=role, human_speed=True, randomize_positions=True) agent = PigChaseChallengeAgent(name) if type(agent.current_agent) == RandomAgent: agent_type = PigChaseEnvironment.AGENT_TYPE_1 else: agent_type = PigChaseEnvironment.AGENT_TYPE_2 obs = env.reset(agent_type) reward = 0 rewards = [] done = False episode = 0 while True: # select an action action = agent.act(obs, reward, done, True) if done: visualizer << (episode + 1, 'Reward', sum(rewards)) rewards = [] episode += 1 if type(agent.current_agent) == RandomAgent: agent_type = PigChaseEnvironment.AGENT_TYPE_1 else: agent_type = PigChaseEnvironment.AGENT_TYPE_2 obs = env.reset(agent_type) # take a step obs, reward, done = env.do(action) rewards.append(reward) else: env = PigChaseEnvironment(clients, PigChaseSymbolicStateBuilder(), actions=list(ARROW_KEYS_MAPPING.values()), role=role, randomize_positions=True) env.reset(PigChaseEnvironment.AGENT_TYPE_3) agent = PigChaseHumanAgent(name, env, list(ARROW_KEYS_MAPPING.keys()), max_episodes, max_actions, visualizer, quit) agent.show()
def run_maze_learner(mission, clients): if 'malmopy.visualization.tensorboard' in sys.modules: visualizer = TensorboardVisualizer() visualizer.initialize(logdir, None) else: visualizer = ConsoleVisualizer() # with TensorboardVisualizer() as visualizer: env = MazeEnvironment(mission, [str.split(client, ':') for client in clients]) env.recording = False # explorer = LinearEpsilonGreedyExplorer(1, 0.1, 10000) # model = DeepQNeuralNetwork((4, 84, 84), (env.available_actions,), momentum=0, visualizer=visualizer) # memory = TemporalMemory(50000, model.input_shape[1:], model.input_shape[0], False) agent = RandomAgent( "rand", 3 ) #DQNAgent("Maze DQN Agent", env.available_actions, model, memory, explorer=explorer, #visualizer=visualizer) # exp = SingleAgentExperiment("Malmo Cliff Walking", agent, env, 500000, warm_up_timesteps=500, # visualizer=visualizer) # exp.episode_end += on_episode_end # visualizer.initialize(MALMO_MAZE_FOLDER, model, CntkConverter()) # with Popen(['tensorboard', '--logdir=%s' % path.join(MALMO_MAZE_FOLDER, path.pardir), '--port=6006']): EPOCH_SIZE = 250000 max_training_steps = 50 * EPOCH_SIZE state = env.reset() reward = 0 agent_done = False viz_rewards = [] for step in range(1, max_training_steps + 1): # action = agent.act(state, reward, agent_done, is_training=True) # check if env needs reset if env.done: visualize_training(visualizer, step, viz_rewards) agent.inject_summaries(step) viz_rewards = [] state = env.reset() # select an action action = agent.act(state, reward, agent_done, is_training=True) print('ACTION BEING TAKEN: ', action) # take a step state, reward, agent_done = env.do(action) viz_rewards.append(reward) if (step % EPOCH_SIZE) == 0: model.save('%s-%s-dqn_%d.model' % (backend, environment, step / EPOCH_SIZE))
def run_maze_learner(mission, clients): if 'malmopy.visualization.tensorboard' in sys.modules: visualizer = TensorboardVisualizer() visualizer.initialize(logdir, None) else: visualizer = ConsoleVisualizer() env = MazeEnvironment(mission, [str.split(client, ':') for client in clients]) env.recording = False agent = TabularQLearnerAgent("rand", 3) #taking random actions EPOCH_SIZE = 250000 max_training_steps = 50 * EPOCH_SIZE state = env.reset() reward = 0 agent_done = False viz_rewards = [] for step in range(1, max_training_steps + 1): # check if env needs reset if env.done: visualize_training(visualizer, step, viz_rewards) agent.inject_summaries(step) viz_rewards = [] state = env.reset() # select an action action = agent.act(step, state, is_training=True) if type(action) == int: print('ACTION BEING TAKEN: ', action) else: print('ACTION BEING TAKEN: ', np.asscalar(action)) # take a step old = state state, reward, agent_done = env.do(action) agent.observe(old, action, state, reward, env.done) viz_rewards.append(reward) if (step % EPOCH_SIZE) == 0: model.save('%s-%s-dqn_%d.model' % (backend, environment, step / EPOCH_SIZE))
help='The type of baseline to run.') arg_parser.add_argument('-e', '--epochs', type=int, default=5, help='Number of epochs to run.') arg_parser.add_argument('clients', nargs='*', default=['127.0.0.1:10000', '127.0.0.1:10001'], help='Minecraft clients endpoints (ip(:port)?)+') args = arg_parser.parse_args() logdir = BASELINES_FOLDER % ('bayes_agent', datetime.utcnow().isoformat()) if 'malmopy.visualization.tensorboard' in sys.modules: visualizer = TensorboardVisualizer() visualizer.initialize(logdir, None) else: visualizer = ConsoleVisualizer() agents = [{ 'name': agent, 'role': role, 'type': args.type, 'clients': args.clients, 'max_epochs': args.epochs, 'logdir': logdir, 'visualizer': visualizer } for role, agent in enumerate(ENV_AGENT_NAMES)] run_experiment(agents)
def restartGame(): obs = env.reset() reward = torch.zeros(config.general.batch_size).type_as(dtype) done = torch.zeros(config.general.batch_size).type_as(dtype) for agent in agents: agent.reset() return obs, reward, done obs, reward, done = restartGame() if config.envs.visualize: visualizer = ConsoleVisualizer(prefix='Agent %d' % 0) ag2 = PigChaseHumanAgent("Agent_2", env, list(ARROW_KEYS_MAPPING.keys()), 10, 25, visualizer, quit) ag2.show() crt_agent = 0 it = [0, 0] episode = 0 # Action batch rew_1 = 0 done_1 = 0 all_rewards = 0 while episode < EVAL_EPISODES: