Exemple #1
0
def deep_q_learning(sess,
                    agent_host,
                    q_estimator,
                    target_estimator,
                    state_processor,
                    num_episodes,
                    experiment_dir,
                    replay_memory_size=50000,
                    replay_memory_init_size=5000,
                    update_target_estimator_every=1000,
                    discount_factor=0.99,
                    epsilon_start=1.0,
                    epsilon_end=0.1,
                    epsilon_decay_steps=8000,
                    batch_size=32,
                    record_video_every=100):
    """
    Q-Learning algorithm for off-policy TD control using Function Approximation.
    Finds the optimal greedy policy while following an epsilon-greedy policy.
    Args:
        sess: Tensorflow Session object
        env: OpenAI environment
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for the targets
        state_processor: A StateProcessor object
        num_episodes: Number of episodes to run for
        experiment_dir: Directory to save Tensorflow summaries in
        replay_memory_size: Size of the replay memory
        replay_memory_init_size: Number of random experiences to sampel when initializing
          the reply memory.
        update_target_estimator_every: Copy parameters from the Q estimator to the
          target estimator every N steps
        discount_factor: Gamma discount factor
        epsilon_start: Chance to sample a random action when taking an action.
          Epsilon is decayed over time and this is the start value
        epsilon_end: The final minimum value of epsilon after decaying is done
        epsilon_decay_steps: Number of steps to decay epsilon over
        batch_size: Size of batches to sample from the replay memory
        record_video_every: Record a video every N episodes
    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    """
    mission_file = agent_host.getStringArgument('mission_file')
    mission_file = os.path.join(mission_file, "Maze0.xml")
    currentMission = mission_file
    with open(mission_file, 'r') as f:
        print("Loading mission from %s" % mission_file)
        mission_xml = f.read()
        my_mission = MalmoPython.MissionSpec(mission_xml, True)
    my_mission.removeAllCommandHandlers()
    my_mission.allowAllDiscreteMovementCommands()
    my_mission.setViewpoint(2)
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo(
        '127.0.0.1', 10000))  # add Minecraft machines here as available
    # my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10001))

    max_retries = 3
    agentID = 0
    expID = 'Deep_q_learning memory'

    Transition = namedtuple(
        "Transition", ["state", "action", "reward", "next_state", "done"])

    # The replay memory
    replay_memory = []

    # Keeps track of useful statistics
    stats = plotting.EpisodeStats(episode_lengths=np.zeros(num_episodes),
                                  episode_rewards=np.zeros(num_episodes))

    # Create directories for checkpoints and summaries
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    monitor_path = os.path.join(experiment_dir, "monitor")

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    if not os.path.exists(monitor_path):
        os.makedirs(monitor_path)

    saver = tf.train.Saver()
    # Load a previous checkpoint if we find one
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)

    if latest_checkpoint:
        print("Loading model checkpoint {}...\n".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    total_t = sess.run(tf.contrib.framework.get_global_step())

    # The epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)

    # The policy we're following
    policy = make_epsilon_greedy_policy(q_estimator, len(actionSet))

    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "save_%s-rep" % (expID))

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record,
                                    agentID, "%s" % (expID))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2.5)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        print("Sleeping")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()
    agent_host.sendCommand("look -1")
    agent_host.sendCommand("look -1")
    print("Populating replay memory...")

    while world_state.is_mission_running and all(
            e.text == '{}' for e in world_state.observations):
        print("Sleeping....")
        world_state = agent_host.peekWorldState()
    # Populate the replay memory with initial experience

    while world_state.number_of_observations_since_last_state <= 0 and world_state.is_mission_running:
        # print("Sleeping")
        time.sleep(0.1)
        world_state = agent_host.peekWorldState()

    state = gridProcess(
        world_state
    )  # MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset()
    state = state_processor.process(sess, state)
    state = np.stack([state] * 4, axis=2)

    for i in range(replay_memory_init_size):
        print("%s th replay memory" % i)
        mission_file = agent_host.getStringArgument('mission_file')
        if i % 20 == 0:
            mazeNum = randint(0, 4)
            mission_file = os.path.join(mission_file, "Maze%s.xml" % mazeNum)
            currentMission = mission_file
        else:
            mission_file = currentMission

        print("Mission File:", mission_file)
        action_probs = policy(sess, state,
                              epsilons[min(total_t, epsilon_decay_steps - 1)])
        action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
        # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action
        # print("Sending command: ", actionSet[action])
        agent_host.sendCommand(actionSet[action])

        world_state = agent_host.peekWorldState()

        num_frames_seen = world_state.number_of_video_frames_since_last_state

        while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen:
            world_state = agent_host.peekWorldState()

        done = not world_state.is_mission_running

        if world_state.is_mission_running:

            # Getting the reward from taking a step
            while world_state.number_of_rewards_since_last_state <= 0:
                time.sleep(0.1)
                world_state = agent_host.peekWorldState()
            reward = world_state.rewards[-1].getValue()
            print("1)Just received the reward: %s on action: %s " %
                  (reward, actionSet[action]))

            while world_state.is_mission_running and all(
                    e.text == '{}' for e in world_state.observations):
                world_state = agent_host.peekWorldState()
            # world_state = agent_host.getWorldState()

            if not world_state.is_mission_running:
                # reward = 0
                next_state = state
                done = not world_state.is_mission_running
                print("1)Action: %s, Reward: %s, Done: %s" %
                      (actionSet[action], reward, done))
                replay_memory.append(
                    Transition(state, action, reward, next_state, done))
                # restart mission for next round of memory generation

                with open(mission_file, 'r') as f:
                    print("Loading mission from %s" % mission_file)
                    mission_xml = f.read()
                    my_mission = MalmoPython.MissionSpec(mission_xml, True)
                my_mission.removeAllCommandHandlers()
                my_mission.allowAllDiscreteMovementCommands()
                my_mission.setViewpoint(2)

                for retry in range(max_retries):
                    try:
                        agent_host.startMission(my_mission, my_clients,
                                                my_mission_record, agentID,
                                                "%s" % (expID))
                        break
                    except RuntimeError as e:
                        if retry == max_retries - 1:
                            print("Error starting mission:", e)
                            exit(1)
                        else:
                            time.sleep(2.5)

                world_state = agent_host.getWorldState()
                while not world_state.has_mission_begun:
                    print(".", end="")
                    time.sleep(0.1)
                    world_state = agent_host.getWorldState()
                agent_host.sendCommand("look -1")
                agent_host.sendCommand("look -1")
                while world_state.is_mission_running and all(
                        e.text == '{}' for e in world_state.observations):
                    world_state = agent_host.peekWorldState()
                state = gridProcess(
                    world_state)  # Malmo GetworldState? / env.reset()
                state = state_processor.process(sess, state)
                state = np.stack([state] * 4, axis=2)

            else:
                next_state = gridProcess(world_state)
                next_state = state_processor.process(sess, next_state)
                next_state = np.append(state[:, :, 1:],
                                       np.expand_dims(next_state, 2),
                                       axis=2)
                done = not world_state.is_mission_running
                print("1)Action: %s, Reward: %s, Done: %s" %
                      (actionSet[action], reward, done))
                replay_memory.append(
                    Transition(state, action, reward, next_state, done))
                state = next_state

        else:
            done = not world_state.is_mission_running
            if len(world_state.rewards) > 0:
                reward = world_state.rewards[-1].getValue()
            else:
                reward = 0
            print("2)Just received the reward: %s on action: %s " %
                  (reward, actionSet[action]))
            next_state = state
            print("2)Action: %s, Reward: %s, Done: %s" %
                  (actionSet[action], reward, done))
            replay_memory.append(
                Transition(state, action, reward, next_state, done))

            # restart mission for next round of memory generation

            with open(mission_file, 'r') as f:
                print("Loading mission from %s" % mission_file)
                mission_xml = f.read()
                my_mission = MalmoPython.MissionSpec(mission_xml, True)
            my_mission.removeAllCommandHandlers()
            my_mission.allowAllDiscreteMovementCommands()
            my_mission.setViewpoint(2)

            for retry in range(max_retries):
                try:
                    agent_host.startMission(my_mission, my_clients,
                                            my_mission_record, agentID,
                                            "%s" % (expID))
                    break
                except RuntimeError as e:
                    if retry == max_retries - 1:
                        print("Error starting mission:", e)
                        exit(1)
                    else:
                        time.sleep(2.5)

            world_state = agent_host.getWorldState()
            while not world_state.has_mission_begun:
                print(".", end="")
                time.sleep(0.1)
                world_state = agent_host.getWorldState()
            agent_host.sendCommand("look -1")
            agent_host.sendCommand("look -1")
            while world_state.is_mission_running and all(
                    e.text == '{}' for e in world_state.observations):
                world_state = agent_host.peekWorldState()

            state = gridProcess(
                world_state)  # Malmo GetworldState? / env.reset()
            state = state_processor.process(sess, state)
            state = np.stack([state] * 4, axis=2)

        # time.sleep(0.2)
    print("Finished populating memory")

    # Record videos
    # Use the gym env Monitor wrapper
    # env = Monitor(env,
    #               directory=monitor_path,
    #               resume=True,
    #               video_callable=lambda count: count % record_video_every ==0)

    # NEED TO RECORD THE VIDEO AND SAVE TO THE SPECIFIED DIRECTORY
    currentMission = mission_file
    for i_episode in range(num_episodes):
        print("%s-th episode" % i_episode)

        if i_episode != 0:
            mission_file = agent_host.getStringArgument('mission_file')
            if i_episode % 20 == 0:
                mazeNum = randint(0, 4)
                mission_file = os.path.join(mission_file,
                                            "Maze%s.xml" % mazeNum)
                currentMission = mission_file
            else:
                mission_file = currentMission

            with open(mission_file, 'r') as f:
                print("Loading mission from %s" % mission_file)
                mission_xml = f.read()
                my_mission = MalmoPython.MissionSpec(mission_xml, True)
            my_mission.removeAllCommandHandlers()
            my_mission.allowAllDiscreteMovementCommands()
            # my_mission.requestVideo(320, 240)
            my_mission.forceWorldReset()
            my_mission.setViewpoint(2)
            my_clients = MalmoPython.ClientPool()
            my_clients.add(MalmoPython.ClientInfo(
                '127.0.0.1',
                10000))  # add Minecraft machines here as available

            max_retries = 3
            agentID = 0
            expID = 'Deep_q_learning '

            my_mission_record = malmoutils.get_default_recording_object(
                agent_host, "save_%s-rep%d" % (expID, i))

            for retry in range(max_retries):
                try:
                    agent_host.startMission(my_mission, my_clients,
                                            my_mission_record, agentID,
                                            "%s-%d" % (expID, i))
                    break
                except RuntimeError as e:
                    if retry == max_retries - 1:
                        print("Error starting mission:", e)
                        exit(1)
                    else:
                        time.sleep(2.5)

            world_state = agent_host.getWorldState()
            print("Waiting for the mission to start", end=' ')
            while not world_state.has_mission_begun:
                print(".", end="")
                time.sleep(0.1)
                world_state = agent_host.getWorldState()
                for error in world_state.errors:
                    print("Error:", error.text)
        agent_host.sendCommand("look -1")
        agent_host.sendCommand("look -1")
        # Save the current checkpoint
        saver.save(tf.get_default_session(), checkpoint_path)

        while world_state.is_mission_running and all(
                e.text == '{}' for e in world_state.observations):
            world_state = agent_host.peekWorldState()
        # world_state = agent_host.getWorldState()
        state = gridProcess(world_state)  # MalmoGetWorldState?
        state = state_processor.process(sess, state)
        state = np.stack([state] * 4, axis=2)
        loss = None

        # One step in the environment
        for t in itertools.count():

            # Epsilon for this time step
            epsilon = epsilons[min(total_t, epsilon_decay_steps - 1)]

            # Add epsilon to Tensorboard
            episode_summary = tf.Summary()
            episode_summary.value.add(simple_value=epsilon, tag="epsilon")
            q_estimator.summary_writer.add_summary(episode_summary, total_t)

            # Maybe update the target estimator
            if total_t % update_target_estimator_every == 0:
                copy_model_parameters(sess, q_estimator, target_estimator)
                print("\nCopied model parameters to target network.")

            # Print out which step we're on, useful for debugging.
            print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format(
                t, total_t, i_episode + 1, num_episodes, loss),
                  end="")
            sys.stdout.flush()

            # Take a step
            action_probs = policy(sess, state, epsilon)
            action = np.random.choice(np.arange(len(action_probs)),
                                      p=action_probs)
            # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo AgentHost send command?
            # print("Sending command: ", actionSet[action])
            agent_host.sendCommand(actionSet[action])

            world_state = agent_host.peekWorldState()

            num_frames_seen = world_state.number_of_video_frames_since_last_state

            while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen:
                world_state = agent_host.peekWorldState()

            done = not world_state.is_mission_running
            print(" IS MISSION FINISHED? ", done)
            if world_state.is_mission_running:
                while world_state.number_of_rewards_since_last_state <= 0:
                    time.sleep(0.1)
                    world_state = agent_host.peekWorldState()
                reward = world_state.rewards[-1].getValue()
                print("Just received the reward: %s on action: %s " %
                      (reward, actionSet[action]))

                while world_state.is_mission_running and all(
                        e.text == '{}' for e in world_state.observations):
                    world_state = agent_host.peekWorldState()
                # world_state = agent_host.getWorldState()

                if world_state.is_mission_running:
                    next_state = gridProcess(world_state)
                    next_state = state_processor.process(sess, next_state)
                    next_state = np.append(state[:, :, 1:],
                                           np.expand_dims(next_state, 2),
                                           axis=2)
                else:
                    print("Mission finished prematurely")
                    next_state = state
                    done = not world_state.is_mission_running

                # If our replay memory is full, pop the first element
                if len(replay_memory) == replay_memory_size:
                    replay_memory.pop(0)

                # Save transition to replay memory
                replay_memory.append(
                    Transition(state, action, reward, next_state, done))

                # Update statistics
                stats.episode_rewards[i_episode] += reward
                stats.episode_lengths[i_episode] = t

                # Sample a minibatch from the replay memory
                samples = random.sample(replay_memory, batch_size)
                states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(
                    np.array, zip(*samples))

                # Calculate q values and targets (Double DQN)
                q_values_next = q_estimator.predict(sess, next_states_batch)
                best_actions = np.argmax(q_values_next, axis=1)
                q_values_next_target = target_estimator.predict(
                    sess, next_states_batch)
                targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \
                                discount_factor * q_values_next_target[np.arange(batch_size), best_actions]

                # Perform gradient descent update
                states_batch = np.array(states_batch)
                loss = q_estimator.update(sess, states_batch, action_batch,
                                          targets_batch)
                if done:
                    print("End of episode")
                    break
                state = next_state
                total_t += 1

            if done:
                # while world_state.number_of_rewards_since_last_state <=0:
                #     time.sleep(0.1)
                #     print("Sleeping...zzzz")
                #     world_state = agent_host.peekWorldState()
                if len(world_state.rewards) > 0:
                    reward = world_state.rewards[-1].getValue()
                else:
                    print("IDK no reward")
                    reward = 0
                # reward = 0
                print("Just received the reward: %s on action: %s " %
                      (reward, actionSet[action]))

                next_state = state

                replay_memory.append(
                    Transition(state, action, reward, next_state, done))

                if len(replay_memory) == replay_memory_size:
                    replay_memory.pop(0)

                stats.episode_rewards[i_episode] += reward
                stats.episode_lengths[i_episode] = t

                samples = random.sample(replay_memory, batch_size)
                states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(
                    np.array, zip(*samples))

                # Calculate q values and targets (Double DQN)
                q_values_next = q_estimator.predict(sess, next_states_batch)
                best_actions = np.argmax(q_values_next, axis=1)
                q_values_next_target = target_estimator.predict(
                    sess, next_states_batch)
                targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \
                                discount_factor * q_values_next_target[np.arange(batch_size), best_actions]

                # Perform gradient descent update
                states_batch = np.array(states_batch)
                loss = q_estimator.update(sess, states_batch, action_batch,
                                          targets_batch)

                print("End of Episode")
                break

            # state = next_state
            # total_t += 1

        # Add summaries to tensorboard
        print("Adding to tensorboard summaries !!!!")
        episode_summary = tf.Summary()
        episode_summary.value.add(
            simple_value=stats.episode_rewards[i_episode],
            node_name="episode_reward",
            tag="episode_reward")
        episode_summary.value.add(
            simple_value=stats.episode_lengths[i_episode],
            node_name="episode_length",
            tag="episode_length")
        q_estimator.summary_writer.add_summary(episode_summary, total_t)
        q_estimator.summary_writer.flush()

        yield total_t, plotting.EpisodeStats(
            episode_lengths=stats.episode_lengths[:i_episode + 1],
            episode_rewards=stats.episode_rewards[:i_episode + 1])
    # time.sleep(0.2)
    # env.monitor.close()
    return stats
Exemple #2
0
                    missionXML += plug_in_dimensions(mob[1])

    missionXML += '''</DrawingDecorator>'''
    return missionXML


my_client_pool = MalmoPython.ClientPool()
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))

if sys.version_info[0] == 2:
    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w',
                           0)  # flush print output immediately
else:
    import functools
    print = functools.partial(print, flush=True)
my_mission = MalmoPython.MissionSpec(
    getMissionXML(endCondition, timeoutCondition), True)

my_mission_record = MalmoPython.MissionRecordSpec()
max_retries = 3
for retry in range(max_retries):
    try:
        agent_host.startMission(my_mission, my_client_pool, my_mission_record,
                                0, "blahblah")
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print("Error starting mission", e)
            print("Is the game running?")
            exit(1)
        else:
            time.sleep(2)
Exemple #3
0
    print = functools.partial(print, flush=True)

# Create default Malmo objects:

agent_host = MalmoPython.AgentHost()
try:
    agent_host.parse(sys.argv)
except RuntimeError as e:
    print('ERROR:', e)
    print(agent_host.getUsage())
    exit(1)
if agent_host.receivedArgument("help"):
    print(agent_host.getUsage())
    exit(0)

my_mission = MalmoPython.MissionSpec()
my_mission_record = MalmoPython.MissionRecordSpec()

# Attempt to start a mission:
max_retries = 3
for retry in range(max_retries):
    try:
        agent_host.startMission(my_mission, my_mission_record)
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print("Error starting mission:", e)
            exit(1)
        else:
            time.sleep(2)
            </AgentHandlers>
        </AgentSection>

    </Mission>'''


# Variety of strategies for dealing with loss of motion:
commandSequences = [
    "jump 1; move 1; wait 1; jump 0; move 1; wait 2",  # attempt to jump over obstacle
    "turn 0.5; wait 1; turn 0; move 1; wait 2",  # turn right a little
    "turn -0.5; wait 1; turn 0; move 1; wait 2",  # turn left a little
    "move 0; attack 1; wait 5; pitch 0.5; wait 1; pitch 0; attack 1; wait 5; pitch -0.5; wait 1; pitch 0; attack 0; move 1; wait 2",  # attempt to destroy some obstacles
    "move 0; pitch 1; wait 2; pitch 0; use 1; jump 1; wait 6; use 0; jump 0; pitch -1; wait 1; pitch 0; wait 2; move 1; wait 2"  # attempt to build tower under our feet
]

my_mission = MalmoPython.MissionSpec(GetMissionXML(pitfall), True)
my_mission_record = MalmoPython.MissionRecordSpec()
if recordingsDirectory:
    my_mission_record.setDestination(recordingsDirectory + "//" +
                                     "Mission_1.tgz")
    my_mission_record.recordRewards()
    my_mission_record.recordObservations()
    my_mission_record.recordCommands()
    if agent_host.receivedArgument("record_video"):
        my_mission_record.recordMP4(24, 2000000)

if agent_host.receivedArgument("test"):
    my_mission.timeLimitInSeconds(20)  # else mission runs forever

# Attempt to start the mission:
max_retries = 3
Exemple #5
0
    def run_mission(self):  # Running the mission (taken from grammar_demo.py)
        # -- set up the mission -- #
        with open(self.mission_file, 'r') as f:
            print("Loading mission from %s" % self.mission_file)
            mission_xml = f.read()
            my_mission = MalmoPython.MissionSpec(mission_xml, True)
        # add 20% holes for interest
        """for x in range(1,4):
            for z in range(1,13):
                if random.random()<0.1:
                    my_mission.drawBlock( x,45,z,"lava")"""

        max_retries = 3

        checkpoint_iter = 100

        if self.agent.host.receivedArgument("test"):
            num_repeats = 1
        else:
            num_repeats = 150

        cumulative_rewards = []
        for i in range(num_repeats):
            print()
            print('Repeat %d of %d' % (i + 1, num_repeats))

            my_mission_record = MalmoPython.MissionRecordSpec()

            for retry in range(max_retries):
                try:
                    self.agent.host.startMission(my_mission, my_mission_record)
                    break
                except RuntimeError as e:
                    if retry == max_retries - 1:
                        print("Error starting mission:", e)
                        exit(1)
                    else:
                        time.sleep(2.5)

            print("Waiting for the mission to start", end=' ')
            world_state = self.agent.host.getWorldState()
            while not world_state.has_mission_begun:
                print(".", end="")
                time.sleep(0.1)
                world_state = self.agent.host.getWorldState()
                for error in world_state.errors:
                    print("Error:", error.text)
            print()

            # -- run the agent in the world -- #
            cumulative_reward = self.agent.run()
            print('Cumulative reward: %d' % cumulative_reward)
            # cumulative_rewards += [ cumulative_reward ]

            self.agent.logOutput()
            if i % checkpoint_iter == 0:
                self.agent.logOutput()

            # -- clean up -- #
            time.sleep(0.5)  # (let the Mod reset)

        print("Done.")

        print()
        print("Cumulative rewards for all %d runs:" % num_repeats)
        print(cumulative_rewards)
        return
Exemple #6
0
def with_punch(agent_host, punch_cmd, my_mission, my_mission_record, plm):

    # Step.1 build up the game
    # Start game and collect experience
    # notice('with tool punch block')

    # parse the input punch command
    # print "punch ... ing "
    # print punch_cmd

    # convert experience into new style envir/block & agent/item
    # this is very important
    action = punch_cmd[1]

    env_block = action[1]
    agt_item = action[3]

    # start point of action
    init_scene = OrderedDict()
    init_scene['env'] = []
    init_scene['agt'] = []

    init_scene['agt'].append(agt_item)
    init_scene['env'].append(env_block)

    # end point of action
    end_state = punch_cmd[2]
    end_scene = OrderedDict()
    end_scene['env'] = []
    end_scene['agt'] = []

    # print '########'
    # print end_state

    for item in end_state:
        end_scene['agt'].append(item + '*' + str(end_state[item]))

    #
    # test the new form
    new_scene = [init_scene, action, end_scene]
    # env_target =

    #
    target = init_scene['env'][0].split('*')[0]
    tool = action[-1].split('*')[0]
    # print 'punch => ', target, 'with : ', item
    if tool == 'hand':
        tool = 'air'
    behavior = ['punch', tool, target]
    # =========== # build the scene # with script
    # world_state = agent_host.getWorldState() # get from world_state
    # print '##########'
    # print world_state.observations[-1].text
    # planetbox = ['wheat']

    # environment object
    # if block not in planetbox:
    #     my_mission.drawBlock(5,5,5, block)
    # if block in planetbox:
    #     my_mission.drawBlock(5,4,5, block)

    # Step.2 build up the basic scene
    # init the environment scene !
    my_mission.forceWorldReset()  # force the world to reset
    my_mission.observeGrid(0, 0, 0, 2, 2, 2, 'grid')
    # my_mission.observeHotBar()

    # block or entity
    # print "PLM"
    blockflag = None
    for item in plm.types_dict:
        if item == "EntityTypes":
            for word in plm.types_dict[item]:
                if target == word.lower():
                    blockflag = "EntityTypes"
                    target = word
        if item == "BlockType":
            for word in plm.types_dict[item]:
                if target == word.lower():
                    blockflag = "BlockType"
                    target = word

    if blockflag == None:
        print 'target is ', target, 'wrong types ,.,..'
        return None
        # raise ValueError;
    print "air"
    print 'target == >', target

    # print blockflag # Block and Entity is totally different!
    # block = "Stone"
    """
    # if blockflag == "BlockType":
    if False:
    # if True:
        # Block 
        dropbox = ['sand', 'gravel']
        if block in dropbox:
            for i in range(5, 5+10):
                my_mission.drawBlock(5,i,5, str(block))
        else:
            my_mission.drawBlock(5,5,5, str(block))
            
        my_mission.drawBlock(4,5,5, 'stone')
        my_mission.drawBlock(6,5,5, 'stone')
        if block in dropbox:
            my_mission.drawBlock(5,4,5, 'stone')
        my_mission.drawBlock(5,4,6, 'stone')
        my_mission.startAtWithPitchAndYaw(5.5,4,4,0,0)
        # my_mission.endAt(5,4,6,1)
        strxml = my_mission.getAsXML(True)
        root = ET.fromstring(strxml)
        MalmoPython.MissionSpec(strxml,True)
    """

    # Entity or Block is fine
    # update the block into Entity?
    # if blockflag == "EntityTypes":
    # if blockflag == "BlockType":
    if True:

        # print block
        # build up the fence or ironblock using stone
        # pig x5,y5,z5
        # build fence to limit the move
        fence = 'sand'

        my_mission.drawCuboid(-0, 4, -0, 10, 4, 10, 'stone')
        my_mission.drawCuboid(-0, 4, -0, 10, 9, -0, 'sand')
        my_mission.drawCuboid(-0, 4, -0, -0, 9, 10, 'sand')
        my_mission.drawCuboid(-0, 4, 10, 10, 9, 10, 'sand')
        my_mission.drawCuboid(10, 4, -0, 10, 9, 10, 'sand')
        """
        my_mission.drawBlock(5,4,6,fence)
        my_mission.drawBlock(4,4,6,fence)
        my_mission.drawBlock(6,4,6,fence)
        
        my_mission.drawBlock(5,5,6,fence)
        my_mission.drawBlock(4,5,6,fence)
        my_mission.drawBlock(6,5,6,fence)
        
        # my_mission.drawBlock(5,4,6,fence)
        # around side
        my_mission.drawBlock(4,4,5,fence)
        my_mission.drawBlock(6,4,5,fence)
        
        my_mission.drawBlock(4,5,5,fence)
        my_mission.drawBlock(6,5,5,fence)
        
        my_mission.drawBlock(4,4,4,fence)
        my_mission.drawBlock(6,4,4,fence)
        
        my_mission.drawBlock(4,5,4,fence)
        my_mission.drawBlock(6,5,4,fence)
        
        my_mission.drawBlock(4,4,3,fence)
        my_mission.drawBlock(6,4,3,fence)        

        my_mission.drawBlock(4,5,3,fence)
        my_mission.drawBlock(6,5,3,fence)


        # back of wall
        my_mission.drawBlock(5,4,2,fence)
        my_mission.drawBlock(4,4,2,fence)
        my_mission.drawBlock(6,4,2,fence)
        
        my_mission.drawBlock(5,5,2,'iron_bars')
        my_mission.drawBlock(4,5,2,'iron_bars')
        my_mission.drawBlock(6,5,2,'iron_bars')

        # build orak floor?
        my_mission.drawBlock(5,3,5,fence)
        my_mission.drawBlock(5,3,4,fence)
        my_mission.drawBlock(5,3,3,fence)
        """
        #
        if blockflag == "BlockType":
            my_mission.drawBlock(5, 4, 5, target)
            # print 'skip the block and directly test the entity ... '
            # return None

        if blockflag == "EntityTypes":
            strxml = my_mission.getAsXML(True)
            root = ET.fromstring(strxml)
            # load in the Entity
            t = {
                'pitch': str(0),
                'type': target,
                'x': "5.5",
                "xVel": "0",
                "yaw": "0",
                "y": "5",
                "yVel": "0",
                "z": "5",
                "zVel": "0"
            }
            init_item = []
            init_item.append(t)
            for child in root.iter(
                    '{http://ProjectMalmo.microsoft.com}ServerHandlers'):
                edd = Element(
                    '{http://ProjectMalmo.microsoft.com}DrawingDecorator')
                edd.append(
                    Element('{http://ProjectMalmo.microsoft.com}DrawEntity',
                            t))
                child.append(edd)

            xmlstr = ET.tostring(root, encoding='utf8', method='xml')
            my_mission = MalmoPython.MissionSpec(xmlstr, True)

    print '++++++++++++++++++++++'
    print '++++++++++++++++++++++'
    print '++++++++++++++++++++++'
    print '++++++++++++++++++++++'

    print 'Now running ... ', punch_cmd

    # Step.3 init the agent inventory
    # init the agent inventory !
    # print "####################"
    # print init_scene['agt']
    my_mission = init_agent(my_mission, init_scene['agt'])
    # my_mission = state_to_agent(rewarder, my_mission, state)

    print '#####################'
    print '#####################'
    print '#####################'
    # print my_mission.getAsXML(True)

    states = []
    event_s = None
    action = None

    # my_mission.forceWorldReset()
    states = []
    event_s = None
    action_ = None

    # Step.4 run the scene(script) and collect experience
    #
    # ======== # start the Mission # with the scene # ===== #
    # agent_host, my_mission, my_mission_record = setup_env(params)
    startMission(agent_host, my_mission, my_mission_record)
    world_state = agent_host.getWorldState()
    obs = []
    # the fence already limit the mob , kill it and then move forward ...
    # tool

    # make sure the time stamp record the sense states

    time_stamp = 0
    init_detect = False
    while world_state.is_mission_running:

        world_state = agent_host.peekWorldState()

        if (world_state.has_mission_begun) and (init_detect is False):
            agent_host.sendCommand('move 0')
            action = []
            if len(world_state.observations) > 0:
                world_state = agent_host.peekWorldState()
                obs_text = json.loads(world_state.observations[-1].text)
                print 'Begin test ... '
                print obs_text
                obs_text['action'] = action
                init_detect = True
                obs.append(obs_text)

        if len(world_state.observations) > 0:
            action = []
            toolstr = 'InventorySlot_0_item'
            toolflag = False
            # time stamp 1 record the init state
            world_state = agent_host.peekWorldState()
            obs_text = json.loads(world_state.observations[-1].text)
            obs_text['action'] = action
            # time_stamp += 1
            # obs_text['time_stamp'] = time_stamp
            # print 'Here!'
            # print obs_text
            obs.append(obs_text)

            #
            # print obs_text
            # tool check code
            # time stamp 2
            # action point
            if obs_text[toolstr] == tool:
                # the right tool is in hand
                toolflag = True
                pass
            else:
                if tool != 'air':  # consider replace hand with air to unique the code
                    for i in xrange(0, 39):
                        key = 'InventorySlot_' + str(i) + '_item'
                        if obs_text[key] == tool:
                            agent_host.sendCommand('swapInventoryItems 0 ' +
                                                   str(i))
                            time.sleep(1)
                            player = obs_text["Name"]
                            action.append(player + " swap " + tool +
                                          " to hand")
                    pass
                else:
                    for i in xrange(0, 39):
                        key = 'InventorySlot_' + str(i) + '_item'
                        if obs_text[key] == 'air':
                            agent_host.sendCommand('swapInventoryItems 0 ' +
                                                   str(i))
                            time.sleep(1)
                            player = obs_text["Name"]
                            action.append(player + " swap " + tool +
                                          " to hand")
                    pass

            world_state = agent_host.peekWorldState()
            obs_text = json.loads(world_state.observations[-1].text)
            obs_text['action'] = action
            # time_stamp += 1
            # obs_text['time_stamp'] = time_stamp
            obs.append(obs_text)

            # update video and world state
            # print 'obs text ... ==== ???'
            # print obs_text
            # print len(world_state.observations)
            # update video and world state again
            attackflag = False
            if toolflag:
                # agent attack
                agent_host.sendCommand('attack 1')
                player = obs_text["Name"]
                action.append(player + " attacks " + target + " with " + tool)
                obs_text['action'] = action
                attackflag = True
                time.sleep(1)

            world_state = agent_host.peekWorldState()
            obs_text = json.loads(world_state.observations[-1].text)
            obs_text['action'] = action
            # time_stamp += 1
            # obs_text['time_stamp'] = time_stamp
            obs.append(obs_text)

            # check for target block or entity to monitor the mission end
            breakflag = False
            if blockflag == "EntityTypes":
                # detect nearest entity
                # print obs_text['near_entities']
                entflag = False
                for ent in obs_text['near_entities']:
                    if ent['name'] == target:
                        entflag = True

                if not entflag:
                    agent_host.sendCommand(
                        'move 0.5')  # move forward to collect drops
                    player = obs_text["Name"]
                    action.append(player + " moves forward")
                    obs_text['action'] = action
                    time.sleep(1)
                    breakflag = True
                pass

            if blockflag == "BlockType":
                # print '#####'
                # print obs_text
                entflag = False
                if obs_text['grid'][6] == target:
                    entflag = True
                if not entflag:
                    agent_host.sendCommand('move 0.5')
                    player = obs_text["Name"]
                    action.append(player + " moves forward")
                    obs_text['action'] = action
                    time.sleep(1)
                    breakflag = True
                pass
            # update action for each time
            world_state = agent_host.peekWorldState()
            obs_text = json.loads(world_state.observations[-1].text)
            obs_text['action'] = action
            # time_stamp += 1
            # obs_text['time_stamp'] = time_stamp
            obs.append(obs_text)

            if breakflag:  # collect all breakflag here !
                break

    # detect and collect the state
    world_state = agent_host.peekWorldState()
    obs_text = json.loads(world_state.observations[-1].text)
    # print 'obs text ... ==== ???'
    # print obs_text
    # print len(world_state.observations)
    agent_host.sendCommand('quit')

    player = obs_text["Name"]
    action = []
    action.append(player + " quit the scene")
    obs_text['action'] = action
    # time_stamp += 1
    # obs_text['time_stamp'] = time_stamp
    obs.append(obs_text)

    print '++++++++++++++++'
    print '+++++OBS++++++++'
    print '++++++++++++++++'

    print '', len(obs)
    for e in obs:
        print '#####'
        print obs

    plm.count += 1
    print '---- ', str(plm.count), 'th scene running ---- '
    print '###############3'
    print '###############3'

    #

    event_e = None
    # next_state = states[-1]

    # if overall_state(rewarder, state) == overall_state(rewarder, next_state):
    #     flag = False
    # else:
    #     flag = True
    state = None
    next_state = None
    flag = None

    # return missionflag, obs
    missionflag = True
    package = [punch_cmd, obs]
    return package
Exemple #7
0
    client_pool.add(MalmoPython.ClientInfo('127.0.0.1', x))

# Keep score of how our robots are doing:
survival_scores = [0 for x in range(NUM_AGENTS)
                   ]  # Lasted to the end of the mission without dying.
apple_scores = [0 for x in range(NUM_AGENTS)]  # Collecting apples is good.
zombie_kill_scores = [0 for x in range(NUM_AGENTS)
                      ]  # Good! Help rescue humanity from zombie-kind.
player_kill_scores = [0 for x in range(NUM_AGENTS)
                      ]  # Bad! Don't kill the other players!

num_missions = 5 if INTEGRATION_TEST_MODE else 30000
for mission_no in xrange(1, num_missions + 1):
    print "Running mission #" + str(mission_no)
    # Create mission xml - use forcereset if this is the first mission.
    my_mission = MalmoPython.MissionSpec(
        getXML("true" if mission_no == 1 else "false"), True)

    # Generate an experiment ID for this mission.
    # This is used to make sure the right clients join the right servers -
    # if the experiment IDs don't match, the startMission request will be rejected.
    # In practice, if the client pool is only being used by one researcher, there
    # should be little danger of clients joining the wrong experiments, so a static
    # ID would probably suffice, though changing the ID on each mission also catches
    # potential problems with clients and servers getting out of step.

    # Note that, in this sample, the same process is responsible for all calls to startMission,
    # so passing the experiment ID like this is a simple matter. If the agentHosts are distributed
    # across different threads, processes, or machines, a different approach will be required.
    # (Eg generate the IDs procedurally, in a way that is guaranteed to produce the same results
    # for each agentHost independently.)
    experimentID = str(uuid.uuid4())
Exemple #8
0
    print agent_host.getUsage()
    exit(1)
if agent_host.receivedArgument("help"):
    print agent_host.getUsage()
    exit(0)

itemdrawingxml = GetItemDrawingXML()

if agent_host.receivedArgument("test"):
    num_reps = 1
else:
    num_reps = 30000

for iRepeat in range(num_reps):
    my_mission = MalmoPython.MissionSpec(
        GetMissionXML("Nom nom nom run #" + str(iRepeat), itemdrawingxml),
        validate)
    # Set up a recording - MUST be done once for each mission - don't do this outside the loop!
    my_mission_record = MalmoPython.MissionRecordSpec(recordingsDirectory +
                                                      "//" + "Mission_" +
                                                      str(iRepeat) + ".tgz")
    my_mission_record.recordRewards()
    my_mission_record.recordMP4(24, 400000)
    max_retries = 3
    for retry in range(max_retries):
        try:
            # Attempt to start the mission:
            agent_host.startMission(my_mission, my_client_pool,
                                    my_mission_record, 0, "itemTestExperiment")
            break
        except RuntimeError as e:
    print 'ERROR:', e
    print agent_host.getUsage()
    exit(1)
if agent_host.receivedArgument("help"):
    print agent_host.getUsage()
    exit(0)

if agent_host.receivedArgument("test"):
    num_repeats = 1
else:
    num_repeats = 2  #REMMBER THIS IS THE LEVELS

for i in range(num_repeats):
    size = int(6 + 0.5 * i)
    print "Size of maze:", size
    my_mission = MalmoPython.MissionSpec(
        GetMissionXML("0", 0.4 + float(i / 20.0), size), True)
    my_mission_record = MalmoPython.MissionRecordSpec()
    my_mission.requestVideo(800, 500)
    my_mission.setViewpoint(1)
    # Attempt to start a mission:
    max_retries = 3
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo(
        '127.0.0.1', 10000))  # add Minecraft machines here as available

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record,
                                    0, "%s-%d" % ('Moshe', i))
            break
        except RuntimeError as e:
Exemple #10
0
try:
    os.makedirs(recordingsDirectory)
except OSError as exception:
    if exception.errno != errno.EEXIST:  # ignore error if already existed
        raise

# Set up a recording
my_mission_record = MalmoPython.MissionRecordSpec()
my_mission_record.recordRewards()
my_mission_record.recordObservations()

for iRepeat in xrange(num_reps):
    my_mission_record.setDestination(recordingsDirectory + "//" + "Mission_" +
                                     str(iRepeat) + ".tgz")
    mazeblock = random.choice(mazeblocks)
    my_mission = MalmoPython.MissionSpec(GetMissionXML(mazeblock), validate)

    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print "Error starting mission:", e
                exit(1)
            else:
                time.sleep(2)

    print "Waiting for the mission to start",
    world_state = agent_host.getWorldState()
    print agent_host.getUsage()
    exit(0)

if agent_host.receivedArgument("test"):
    num_reps = 1
else:
    num_reps = 20

current_yaw = 0
best_yaw = 0
current_life = 0

cumulative_rewards = []
for iRepeat in range(num_reps):
    mission_xml = getMissionXML(MOB_TYPE + " Apocalypse #" + str(iRepeat))
    my_mission = MalmoPython.MissionSpec(mission_xml,validate)

    max_retries = 3
    for retry in range(max_retries):
        try:
            # Set up a recording
            my_mission_record = MalmoPython.MissionRecordSpec(recordingsDirectory + "//" + "Mission_" + str(iRepeat) + ".tgz")
            my_mission_record.recordRewards()
            my_mission_record.recordCommands()

            # Attempt to start the mission:
            agent_host.startMission( my_mission, my_client_pool, my_mission_record, 0, "predatorExperiment" )
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print "Error starting mission",e
Exemple #12
0
    </Mission>'''


# Variety of strategies for dealing with loss of motion:
commandSequences = [
    "jump 1; move 1; wait 1; jump 0; move 1; wait 2",  # attempt to jump over obstacle
    "turn 0.5; wait 1; turn 0; move 1; wait 2",  # turn right a little
    "turn -0.5; wait 1; turn 0; move 1; wait 2",  # turn left a little
    "move 0; attack 1; wait 5; pitch 0.5; wait 1; pitch 0; attack 1; wait 5; pitch -0.5; wait 1; pitch 0; attack 0; move 1; wait 2",  # attempt to destroy some obstacles
    "move 0; pitch 1; wait 2; pitch 0; use 1; jump 1; wait 6; use 0; jump 0; pitch -1; wait 1; pitch 0; wait 2; move 1; wait 2"  # attempt to build tower under our feet
]

sys.stdout = os.fdopen(sys.stdout.fileno(), 'w',
                       0)  # flush print output immediately

my_mission = MalmoPython.MissionSpec(GetMissionXML(), True)

agent_host = MalmoPython.AgentHost()
try:
    agent_host.parse(sys.argv)
except RuntimeError as e:
    print 'ERROR:', e
    print agent_host.getUsage()
    exit(1)
if agent_host.receivedArgument("help"):
    print agent_host.getUsage()
    exit(0)

if agent_host.receivedArgument("test"):
    my_mission.timeLimitInSeconds(20)  # else mission runs forever
Exemple #13
0
    def reset(self):
        log = logging.getLogger('SimpleMalmoEnvironment.reset')

        obstacle_locations = [[l[0], l[1]] for l in self.obstacles]
        landmark_locations = [[l[0], l[1]] for l in self.landmarks]

        del self.mission  # just to be sure, i create a new mission every episode
        # mission related objects
        self.mission_xml = self.generate_malmo_environment_xml()
        log.debug("Obtained mission XML: \n %s", self.mission_xml)
        self.mission_record = MalmoPython.MissionRecordSpec()
        self.mission = MalmoPython.MissionSpec(self.mission_xml, True)
        log.info("Loaded mission XML")

        # select a random start location such that is is not one of the wall cells and not one of landmarks
        # x, y = random.randint(0, self.size[0] - 1), random.randint(0, self.size[1] - 1)
        # while [x, y] in obstacle_locations or [x, y] in landmark_locations:
        #     x, y = random.randint(0, 6), random.randint(0, 6)

        self.mission.setViewpoint(1)

        # set mission variables - landmarks, source and destination
        landmarks = copy.deepcopy(self.landmarks)
        source_loc = random.choice(landmarks)  # first select the source to pick up from
        remaining_landmarks = [lm for lm in landmarks if lm != source_loc]  # tentative destinations are other landmarks
        destination = random.choice(remaining_landmarks)  # now randomly choose the destination from above list
        agent_start_loc = random.choice(remaining_landmarks)  # start locations for agent; start loc != pick up source
        x, y = agent_start_loc[0], agent_start_loc[1]
        self.current_agent_location = [x, y]
        # malmo needs locations to be 0.5 to be in the middle of the square, else, it is at the edge
        self.mission.startAt(x + 0.5, 46, y + 0.5)

        self.item_location = landmarks.index(source_loc)
        self.destination = landmarks.index(destination)

        self.mission.drawItem(source_loc[0], 47, source_loc[1], self.landmark_types[self.destination])

        retries = 3
        log.debug("Final Mission XML sent to Malmo: \n %s", self.mission.getAsXML(True))
        for retry in range(retries):
            try:
                malmo_env.startMission(self.mission, self.mission_record)
                time.sleep(10)

                world_state = malmo_env.getWorldState()
                if world_state.has_mission_begun:
                    break
            except RuntimeError as e:
                if retry == retries - 1:
                    log.error("Error starting mission. Max retries elapsed. Closing! %s", e.message)
                    exit(1)
                else:
                    time.sleep(10)

        world_state = malmo_env.getWorldState()

        while not world_state.has_mission_begun:
            log.debug("Waiting for mission to begin")
            time.sleep(0.1)
            world_state = malmo_env.getWorldState()
            for error in world_state.errors:
                log.error("Error: %s", error.text)
    def __init__(self, xmlfile):

        self.agent_host = MalmoPython.AgentHost()
        self.my_mission = MalmoPython.MissionSpec(getMissionXML(xmlfile), True)
        self.my_mission_record = MalmoPython.MissionRecordSpec()

        self.objects_of_interest = ['stone_button', 'wooden_door', 'lever']

        # 4 represents anything in the env that is walkable (excluding wool)
        self.object_to_index = {
            'air': 9,
            'player': 8,
            'wooden_door': 2,
            'wool': 3,
            'stained_hardened_clay': 4,
            'clay': 4,
            'iron_block': 4,
            'quartz_block': 4,
            'fire': 5,
            'lever': 6,
            'stone_button': 7,
            'gravel': 10,
            'redstone_wire': 4
        }

        self.index_to_object = {
            255: 'unknown',
            9: 'frontier',
            8: 'player',
            2: 'wooden_door',
            3: 'wool',
            4: 'wall',
            5: 'fire',
            6: 'lever',
            7: 'stone_button',
            10: 'gravel'
        }

        self.non_opaque_objects = [9, 8, 1, 2, 5, 6,
                                   7]  #state of the door to be recorded
        self.passable_objects = ['air', 'wooden_door']  #, 'lever', 'gravel']
        self.passable_objects_with_cost = {
            'air': 1,
            'lever': 1,
            'wooden_door': 2,
            'gravel': 5
        }
        self.floor_objects_types = [
            'redstone_wire', 'wool', 'iron_block', 'quartz_block'
        ]
        self.envsize = 50
        # Env specific variables; (modify them wrt xmlfile)
        # self.sight= {'x': (-3, 3), 'z': (-3, 3), 'y':(-1, 1)}
        self.sight = {'x': (-21, 21), 'z': (-21, 21), 'y': (-1, 1)}
        self.angle = 50
        self.range_x = abs(self.sight['x'][1] - self.sight['x'][0]) + 1
        self.range_y = abs(self.sight['y'][1] - self.sight['y'][0]) + 1
        self.range_z = abs(self.sight['z'][1] - self.sight['z'][0]) + 1
        self.my_mission.observeGrid(self.sight['x'][0], self.sight['y'][0],
                                    self.sight['z'][0], self.sight['x'][1],
                                    self.sight['y'][1], self.sight['z'][1],
                                    'relative_view')
        self.scanning_range = 15

        # Goal specific variables
        self.num_victims_seen = 0
        self.num_doors_seen = 0
        self.total_victims = 3
        self.total_doors = 3
        self.victims_visited = np.zeros((self.envsize, self.envsize))
        self.victims_visited_sparse = set()

        # self.start_position = {'x': -2185.5, 'y': 28.0, 'z': 167.5}
        self.current_position = (self.range_z // 2, self.range_x // 2)
        self.relative_position = {
            'y': self.range_y // 2,
            'z': self.range_z // 2,
            'x': self.range_x // 2
        }
        self.absolute_position = None
        # NOTE that we start from 0 value of x and half value for z for recording into the array

        # Populate with `observe()` function
        self.grid = None
        self.ypos = None
        self.zpos = None
        self.xpos = None
        self.yaw = None
        self.pitch = None
        self.lineOfSight = None

        self.masked_grid = None
        self.relative_map = None
        self.absolute_map = np.zeros(
            (self.range_y, self.envsize, self.envsize))
        self.origin_coord = {'y': 27.0, 'z': 142.5, 'x': -2190.5}

        self.maze_map_dict = {}
Exemple #15
0
def state_to_agent(rewarder, my_mission, state):
    strxml = my_mission.getAsXML(True)
    # print "############my_mission###############"
    # print strxml

    root = ET.fromstring(strxml)

    # compute the state for agent
    tmp = OrderedDict()
    for i, n in enumerate(state):
        if n != 0:
            tmp[rewarder.index_obj[i]] = int(n)
    init_item = []
    init_slot = 35
    if len(tmp) != 0:
        for item_name in tmp:
            if item_name == 'hand':
                # init_slot = init_slot - 1
                # do nothing
                continue  #
            sn = tmp[item_name] / 64
            sl = tmp[item_name] % 64
            if tmp[item_name] > 64:
                # loop for 64
                for i in sn:
                    t = {
                        'slot': str(init_slot),
                        'type': item_name,
                        'quantity': "64"
                    }
                    init_item.append(t)
                    init_slot = init_slot - 1
                    if init_slot < 9:
                        raise Exception('init_slot Error')
                        break
            t = {
                'slot': str(init_slot),
                'type': item_name,
                'quantity': str(sl)
            }
            init_item.append(t)
            init_slot = init_slot - 1
            if init_slot < 9:
                raise Exception('init_slot Error')
                break
            pass

        # add it into agent mission configuration
        for child in root.iter(
                '{http://ProjectMalmo.microsoft.com}AgentStart'):
            child.append(
                Element('{http://ProjectMalmo.microsoft.com}Inventory'))
            for c in child:
                if "Inventory" in c.tag:
                    # pass 9-35
                    for i_ in init_item:
                        c.append(
                            Element(
                                '{http://ProjectMalmo.microsoft.com}InventoryItem',
                                i_))
    xmlstr = ET.tostring(root, encoding='utf8', method='xml')
    my_mission = MalmoPython.MissionSpec(xmlstr, True)

    return my_mission
                <Grid name="column">
                    <min x="0" y="-256" z="0" />
                    <max x="0" y="256" z="0" />
                </Grid>
            </ObservationFromGrid>
            <VideoProducer want_depth="true">
                <Width>''' + str(video_width) + '''</Width>
                <Height>''' + str(video_height) + '''</Height>
            </VideoProducer>
            <AbsoluteMovementCommands />
        </AgentHandlers>
    </AgentSection>
  </Mission>'''

validate = True
my_mission = MalmoPython.MissionSpec(missionXML, validate)

agent_host = MalmoPython.AgentHost()
try:
    agent_host.parse(sys.argv)
except RuntimeError as e:
    print 'ERROR:', e
    print agent_host.getUsage()
    exit(1)
if agent_host.receivedArgument("help"):
    print agent_host.getUsage()
    exit(0)

agent_host.setObservationsPolicy(
    MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)
agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY)
Exemple #17
0
def init_agent(my_mission, agent_init_inv):

    strxml = my_mission.getAsXML(True)
    # print "############my_mission###############"
    # print strxml
    # print agent_init_inv
    root = ET.fromstring(strxml)

    # to string and rebuild it from string of XML

    # compute the state for agent
    tmp = OrderedDict()

    for item in agent_init_inv:
        pair = item.split('*')
        tmp[pair[0]] = int(pair[1])

    # print '*****************'
    # print tmp

    init_item = []
    init_slot = 35

    if len(tmp) != 0:
        for item_name in tmp:
            if item_name == 'hand':
                # init_slot = init_slot - 1
                # do nothing
                continue  #
            sn = tmp[item_name] / 64
            sl = tmp[item_name] % 64
            if tmp[item_name] > 64:
                # loop for 64
                for i in range(sn):
                    t = {
                        'slot': str(init_slot),
                        'type': item_name,
                        'quantity': "64"
                    }
                    init_item.append(t)
                    init_slot = init_slot - 1
                    if init_slot < 9:
                        raise Exception('init_slot Error')
                        break
            t = {
                'slot': str(init_slot),
                'type': item_name,
                'quantity': str(sl)
            }
            init_item.append(t)
            init_slot = init_slot - 1
            if init_slot < 9:
                raise Exception('init_slot Error')
                break
            pass

        # add it into agent mission configuration
        for child in root.iter(
                '{http://ProjectMalmo.microsoft.com}AgentStart'):
            child.append(
                Element('{http://ProjectMalmo.microsoft.com}Inventory'))
            for c in child:
                if "Inventory" in c.tag:
                    # pass 9-35
                    for i_ in init_item:
                        c.append(
                            Element(
                                '{http://ProjectMalmo.microsoft.com}InventoryItem',
                                i_))

    xmlstr = ET.tostring(root, encoding='utf8', method='xml')
    my_mission = MalmoPython.MissionSpec(xmlstr, True)

    return my_mission
Exemple #18
0
    agent_host.parse(sys.argv)
except RuntimeError as e:
    print 'ERROR:', e
    print agent_host.getUsage()
    exit(1)

if agent_host.receivedArgument("help"):
    print agent_host.getUsage()
    exit(0)

if "gs" in sys.argv:
    search_alg = 'gs'
else:
    search_alg = 'bfs'

my_mission = MalmoPython.MissionSpec(GetMissionXML("random", 0.2), True)
my_mission_record = MalmoPython.MissionRecordSpec()

# Attempt to start a mission:
max_retries = 3
for retry in range(max_retries):
    try:
        agent_host.startMission(my_mission, my_mission_record)
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print "Error starting mission:", e
            exit(1)
        else:
            time.sleep(2)
Exemple #19
0
def excute_scene(exp, plm, agent_config):

    # total action from agent (id , action , experience ) // { craft attack use }
    # scene creation

    agent_host = MalmoPython.AgentHost()

    # default scene path
    scene_path = agent_config
    rewarder = None
    state = None
    item = 'iron_axe'
    block = 'log'

    target_path = "../data/scene_experience/"
    agent_name = agent_config.split("/")[-1].split(".")[0]

    print "Agent_name : ", agent_name

    # common mission setting
    my_mission_record = MalmoPython.MissionRecordSpec()

    print "Now read configuration from ", scene_path
    my_mission = MalmoPython.MissionSpec(open(scene_path).read(), True)
    list_exp = json.loads(exp)
    print list_exp
    if "punch" in list_exp[1]:
        # punch is the most complicated action in environment
        """
        # skip 
        file_name = ('_').join(list_exp[1])
        files_dir = os.listdir(target_path)
        for fn in files_dir:
            fns = fn.split('#')
            if fns[0] == file_name:
                return 0
        """

        scene_record = with_punch(agent_host, list_exp, my_mission,
                                  my_mission_record, plm)
        #
        print "package ... "
        #
        file_name = ('_').join(list_exp[1])
        file_name = agent_name + "#" + file_name
        files_dir = os.listdir(target_path)
        count = 0
        for fn in files_dir:
            fns = fn.split('#')
            if "#".join(fns[:-1]) == file_name:
                count += 1

        print '#############%%%%%%%%%%%%%%%%%%%%'
        print target_path + file_name + "#" + str(count)
        fp = open(target_path + file_name + '#' + str(count), 'w')
        # search for file name
        json_txt = json.dumps(scene_record)
        fp.write(json_txt)
        fp.write('\n')
        fp.close()
        pass

    if "craft" in list_exp[1]:
        """
        file_name = ('_').join(list_exp[1])
        files_dir = os.listdir(target_path)
        for fn in files_dir:
            fns = fn.split('#')
            if fns[0] == file_name:
                return 0
        """

        scene_record = with_craft(agent_host, list_exp, my_mission,
                                  my_mission_record, plm)
        print 'package ... '
        # skip
        file_name = ('_').join(list_exp[1])
        file_name = agent_name + '#' + file_name
        # Adam#craft_activator_rail_with_iron_ingot*6_stick*2_redstone_torch*1
        files_dir = os.listdir(target_path)
        count = 0
        for fn in files_dir:
            fns = fn.split('#')
            if "#".join(fns[:-1]) == file_name:
                count += 1
        print '#############%%%%%%%%%%%%%%%%%%%%'
        print target_path + file_name + "#" + str(count)
        fp = open(target_path + file_name + '#' + str(count), 'w')
        # search for file name
        json_txt = json.dumps(scene_record)
        fp.write(json_txt)
        fp.write('\n')
        fp.close()
        pass
    """
    if "use" in list_exp[1]:
        scene_record = with_use(agent_host, list_exp, my_mission, my_mission_record, plm)
        pass
    """

    #  record the scene_record into file

    pass
 def setMissionXML(self, missionXML):
     self.missionDesc = missionXML
     self.mission = MalmoPython.MissionSpec(missionXML.xml(), True)
     self.mission_record = MalmoPython.MissionRecordSpec()
    agent_host.parse(sys.argv)
except RuntimeError as e:
    print 'ERROR:', e
    print agent_host.getUsage()
    exit(1)
if agent_host.receivedArgument("help"):
    print agent_host.getUsage()
    exit(0)

if agent_host.receivedArgument("test"):
    num_repeats = 1
else:
    num_repeats = 10

for i in range(num_repeats):
    my_mission = MalmoPython.MissionSpec(
        GetMissionXML("random", float(i / 10.0)), True)
    my_mission_record = MalmoPython.MissionRecordSpec()

    # Attempt to start a mission:
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print "Error starting mission:", e
                exit(1)
            else:
                time.sleep(2)
Exemple #22
0
if __name__ == '__main__':
    agent_host = MalmoPython.AgentHost()
    try:
        agent_host.parse(sys.argv)
    except RuntimeError as e:
        print('ERROR:', e)
        print(agent_host.getUsage())
        exit(1)
    if agent_host.receivedArgument("help"):
        print(agent_host.getUsage())
        exit(0)

    n = 1
    num_repeats = 200
    agent = Agent(iterations=num_repeats)
    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_recording_mission = MalmoPython.MissionSpec(recordingXML, True)

    # Attempt to start a mission:
    max_retries = 3

    cumulative_rewards = []
    for i in range(num_repeats):

        for retry in range(max_retries):
            try:
                if RECORDING and (i % RECORDING_ITERATIONS == 0):
                    my_mission_record = MalmoPython.MissionRecordSpec(
                        "recording_" + str(i) + ".tgz")
                    my_mission_record.recordMP4(60, 8000000)
                    agent.recording = True
Exemple #23
0
    agent_host.parse(sys.argv)
except RuntimeError as e:
    print 'ERROR:', e
    print agent_host.getUsage()
    exit(1)
if agent_host.receivedArgument("help"):
    print agent_host.getUsage()
    exit(0)

if agent_host.receivedArgument("test"):
    num_reps = 1
else:
    num_reps = 30000

for iRepeat in range(num_reps):
    my_mission = MalmoPython.MissionSpec(
        GetMissionXML("Crafty #" + str(iRepeat)), validate)
    my_mission_record = MalmoPython.MissionRecordSpec(
    )  # Records nothing by default
    max_retries = 3
    for retry in range(max_retries):
        try:
            # Attempt to start the mission:
            agent_host.startMission(my_mission, my_client_pool,
                                    my_mission_record, 0,
                                    "craftTestExperiment")
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print "Error starting mission", e
                print "Is the game running?"
                exit(1)
Exemple #24
0
    exit(0)

if agent_host.receivedArgument("test"):
    exit(
        0
    )  # TODO: find a way to usefully run this sample as an integration test

input_file_name = agent_host.getStringArgument("file")
if input_file_name == "":
    print('\nERROR: Supply a file to load on the command line.\n')
    print(agent_host.getUsage())
    exit(1)

validate = True
mission_file = open(agent_host.getStringArgument("file"), 'r')
my_mission = MalmoPython.MissionSpec(mission_file.read(), validate)

for iRepeat in range(30000):

    my_mission_record = MalmoPython.MissionRecordSpec()
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_mission_record)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)
Exemple #25
0
    image.save(name)


# Create default Malmo objects:
agent_host = MalmoPython.AgentHost()
try:
    agent_host.parse(sys.argv)
except RuntimeError as e:
    print('ERROR:', e)
    print(agent_host.getUsage())
    exit(1)
if agent_host.receivedArgument("help"):
    print(agent_host.getUsage())
    exit(0)

my_mission = MalmoPython.MissionSpec(missionXML, True)
my_mission_record = MalmoPython.MissionRecordSpec()
# my_mission_record.recordMP4(20, 800000)

# Attempt to start a mission:
max_retries = 3
for retry in range(max_retries):
    try:
        agent_host.startMission(my_mission, my_client_pool, my_mission_record,
                                0, "")
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print("Error starting mission:", e)
            exit(1)
        else:
#!/usr/bin/env python
import MalmoPython
import time

# set up out malmo client
malmo = MalmoPython.AgentHost()
spec = open("classroom_basic.xml").read()
spec = spec.replace("__WIDTH__", "640")
spec = spec.replace("__HEIGHT__", "480")
spec = spec.replace("__EPISODE_TIME_MS__", "10000000")
mission = MalmoPython.MissionSpec(spec, True)
mission_record = MalmoPython.MissionRecordSpec()

malmo.startMission(mission, mission_record)

while True:
    world_state = malmo.getWorldState()
    if len(world_state.rewards) > 0:
        print "len?", len(world_state.rewards)
        print world_state.rewards[0].getValue()
    time.sleep(1)
Exemple #27
0
    )
    print(
        "NB4tf4i vörös pipacsai (Vörös Pipacs Pokol) - DEAC-Hackers Battle Royale Arena\n\n"
    )
    print(
        "The aim of this first challenge, called nb4tf4i's red flowers, is to collect as many red flowers as possible before the lava flows down the hillside.\n"
    )
    print(
        "Ennek az első, az nb4tf4i vörös virágai nevű kihívásnak a célja összegyűjteni annyi piros virágot, amennyit csak lehet, mielőtt a láva lefolyik a hegyoldalon.\n"
    )
    print(
        "Norbert Bátfai, [email protected], https://arato.inf.unideb.hu/batfai.norbert/\n\n"
    )
    print("Loading mission from %s" % missionXML_file)
    mission_xml = f.read()
    my_mission = MalmoPython.MissionSpec(mission_xml, True)
    my_mission.drawBlock(0, 0, 0, "lava")


class Hourglass:
    def __init__(self, charSet):
        self.charSet = charSet
        self.index = 0

    def cursor(self):
        self.index = (self.index + 1) % len(self.charSet)
        return self.charSet[self.index]


hg = Hourglass('|/-\|')
    num_reps = 30000

for iRepeat in range(num_reps):
    # Set up a recording
    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "Patch_{}".format(iRepeat + 1))
    # Find the point at which to create the maze:
    xorg = (iRepeat % 64) * 16
    zorg = ((old_div(iRepeat, 64)) % 64) * 16
    yorg = 200 + ((old_div(iRepeat, (64 * 64))) % 64) * 8

    print("Mission " + str(iRepeat) + " --- starting at " + str(xorg) + ", " +
          str(yorg) + ", " + str(zorg))

    # Create a mission:
    my_mission = MalmoPython.MissionSpec(
        GetMissionXML(iRepeat, xorg, yorg, zorg, iRepeat), True)

    max_retries = 3
    for retry in range(max_retries):
        try:
            # Attempt to start the mission:
            agent_host.startMission(my_mission, my_client_pool,
                                    my_mission_record, 0, str(experimentID))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission", e)
                exit(1)
            else:
                time.sleep(2)
 def _load_mission(self, mission_file):
     logger.info("Loading mission from " + mission_file)
     mission_xml = open(mission_file, 'r').read()
     self.mission_spec = MalmoPython.MissionSpec(mission_xml, True)
     logger.info("Loaded mission: " + self.mission_spec.getSummary())
Exemple #30
0
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ------------------------------------------------------------------------------------------------

import MalmoPython

my_mission = MalmoPython.MissionSpec()
my_mission.timeLimitInSeconds(10)
my_mission.drawBlock(19, 0, 19, "redstone_block")
my_mission.createDefaultTerrain()
my_mission.setTimeOfDay(6000, False)
my_mission.drawCuboid(50, 0, 50, 100, 10, 100, "redstone_block")
my_mission.drawItem(3, 0, 2, "diamond_pickaxe")
my_mission.drawSphere(50, 10, 50, 10, "ice")
my_mission.drawLine(50, 20, 50, 100, 20, 100, "redstone_block")
my_mission.startAt(2.5, 0.0, 2.5)
my_mission.endAt(19.5, 0.0, 19.5, 1.0)
my_mission.requestVideo(320, 240)
my_mission.setModeToCreative()
my_mission.rewardForReachingPosition(19.5, 0.0, 19.5, 100, 1.1)
my_mission.observeRecentCommands()
my_mission.observeHotBar()