Example #1
0
def run_maze_learner(mission, clients):

    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)

    else:
        visualizer = ConsoleVisualizer()


#    with TensorboardVisualizer() as visualizer:
    env = MazeEnvironment(mission,
                          [str.split(client, ':') for client in clients])
    env.recording = False

    #    explorer = LinearEpsilonGreedyExplorer(1, 0.1, 10000)
    #        model = DeepQNeuralNetwork((4, 84, 84), (env.available_actions,), momentum=0, visualizer=visualizer)
    #        memory = TemporalMemory(50000, model.input_shape[1:], model.input_shape[0], False)

    agent = RandomAgent(
        "rand", 3
    )  #DQNAgent("Maze DQN Agent", env.available_actions, model, memory, explorer=explorer,
    #visualizer=visualizer)

    #        exp = SingleAgentExperiment("Malmo Cliff Walking", agent, env, 500000, warm_up_timesteps=500,
    #                                    visualizer=visualizer)
    #        exp.episode_end += on_episode_end

    #        visualizer.initialize(MALMO_MAZE_FOLDER, model, CntkConverter())

    #   with Popen(['tensorboard', '--logdir=%s' % path.join(MALMO_MAZE_FOLDER, path.pardir), '--port=6006']):
    EPOCH_SIZE = 250000
    max_training_steps = 50 * EPOCH_SIZE
    state = env.reset()
    reward = 0
    agent_done = False
    viz_rewards = []
    for step in range(1, max_training_steps + 1):

        #        action = agent.act(state, reward, agent_done, is_training=True)
        # check if env needs reset
        if env.done:
            visualize_training(visualizer, step, viz_rewards)
            agent.inject_summaries(step)
            viz_rewards = []
            state = env.reset()

            # select an action
        action = agent.act(state, reward, agent_done, is_training=True)
        print('ACTION BEING TAKEN: ', action)

        # take a step
        state, reward, agent_done = env.do(action)
        viz_rewards.append(reward)

        if (step % EPOCH_SIZE) == 0:
            model.save('%s-%s-dqn_%d.model' %
                       (backend, environment, step / EPOCH_SIZE))
def run_maze_learner(mission, clients):

    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)

    else:
        visualizer = ConsoleVisualizer()

    env = MazeEnvironment(mission,
                          [str.split(client, ':') for client in clients])
    env.recording = False

    agent = TabularQLearnerAgent("rand", 3)

    #taking random actions
    EPOCH_SIZE = 250000
    max_training_steps = 50 * EPOCH_SIZE
    state = env.reset()
    reward = 0
    agent_done = False
    viz_rewards = []
    for step in range(1, max_training_steps + 1):

        # check if env needs reset
        if env.done:
            visualize_training(visualizer, step, viz_rewards)
            agent.inject_summaries(step)
            viz_rewards = []
            state = env.reset()

            # select an action
        action = agent.act(step, state, is_training=True)
        if type(action) == int:
            print('ACTION BEING TAKEN: ', action)
        else:
            print('ACTION BEING TAKEN: ', np.asscalar(action))

            # take a step
        old = state
        state, reward, agent_done = env.do(action)
        agent.observe(old, action, state, reward, env.done)
        viz_rewards.append(reward)

        if (step % EPOCH_SIZE) == 0:
            model.save('%s-%s-dqn_%d.model' %
                       (backend, environment, step / EPOCH_SIZE))
Example #3
0
                            choices=['astar', 'random'],
                            help='The type of baseline to run.')
    arg_parser.add_argument('-e',
                            '--epochs',
                            type=int,
                            default=5,
                            help='Number of epochs to run.')
    arg_parser.add_argument('clients',
                            nargs='*',
                            default=['127.0.0.1:10000', '127.0.0.1:10001'],
                            help='Minecraft clients endpoints (ip(:port)?)+')
    args = arg_parser.parse_args()

    logdir = BASELINES_FOLDER % ('bayes_agent', datetime.utcnow().isoformat())
    if 'malmopy.visualization.tensorboard' in sys.modules:
        visualizer = TensorboardVisualizer()
        visualizer.initialize(logdir, None)
    else:
        visualizer = ConsoleVisualizer()

    agents = [{
        'name': agent,
        'role': role,
        'type': args.type,
        'clients': args.clients,
        'max_epochs': args.epochs,
        'logdir': logdir,
        'visualizer': visualizer
    } for role, agent in enumerate(ENV_AGENT_NAMES)]

    run_experiment(agents)