def run_maze_learner(mission, clients): if 'malmopy.visualization.tensorboard' in sys.modules: visualizer = TensorboardVisualizer() visualizer.initialize(logdir, None) else: visualizer = ConsoleVisualizer() # with TensorboardVisualizer() as visualizer: env = MazeEnvironment(mission, [str.split(client, ':') for client in clients]) env.recording = False # explorer = LinearEpsilonGreedyExplorer(1, 0.1, 10000) # model = DeepQNeuralNetwork((4, 84, 84), (env.available_actions,), momentum=0, visualizer=visualizer) # memory = TemporalMemory(50000, model.input_shape[1:], model.input_shape[0], False) agent = RandomAgent( "rand", 3 ) #DQNAgent("Maze DQN Agent", env.available_actions, model, memory, explorer=explorer, #visualizer=visualizer) # exp = SingleAgentExperiment("Malmo Cliff Walking", agent, env, 500000, warm_up_timesteps=500, # visualizer=visualizer) # exp.episode_end += on_episode_end # visualizer.initialize(MALMO_MAZE_FOLDER, model, CntkConverter()) # with Popen(['tensorboard', '--logdir=%s' % path.join(MALMO_MAZE_FOLDER, path.pardir), '--port=6006']): EPOCH_SIZE = 250000 max_training_steps = 50 * EPOCH_SIZE state = env.reset() reward = 0 agent_done = False viz_rewards = [] for step in range(1, max_training_steps + 1): # action = agent.act(state, reward, agent_done, is_training=True) # check if env needs reset if env.done: visualize_training(visualizer, step, viz_rewards) agent.inject_summaries(step) viz_rewards = [] state = env.reset() # select an action action = agent.act(state, reward, agent_done, is_training=True) print('ACTION BEING TAKEN: ', action) # take a step state, reward, agent_done = env.do(action) viz_rewards.append(reward) if (step % EPOCH_SIZE) == 0: model.save('%s-%s-dqn_%d.model' % (backend, environment, step / EPOCH_SIZE))
def run_maze_learner(mission, clients): if 'malmopy.visualization.tensorboard' in sys.modules: visualizer = TensorboardVisualizer() visualizer.initialize(logdir, None) else: visualizer = ConsoleVisualizer() env = MazeEnvironment(mission, [str.split(client, ':') for client in clients]) env.recording = False agent = RandomAgent("rand", 3, delay_between_action=1.5) #taking random actions EPOCH_SIZE = 250000 max_training_steps = 50 * EPOCH_SIZE state = env.reset() reward = 0 agent_done = False viz_rewards = [] for step in range(1, max_training_steps + 1): action = agent.act(state, reward, agent_done, is_training=True) # check if env needs reset if env.done: visualize_training(visualizer, step, viz_rewards) agent.inject_summaries(step) viz_rewards = [] state = env.reset() # select an action action = agent.act(state, reward, agent_done, is_training=True) print('ACTION BEING TAKEN: ', action) # take a step state, reward, agent_done = env.do(action) viz_rewards.append(reward) if (step % EPOCH_SIZE) == 0: model.save('%s-%s-dqn_%d.model' % (backend, environment, step / EPOCH_SIZE))