Esempio n. 1
0
 def __init__(self, missionXML, serverIp='127.0.0.1'):
     self.missionDesc = None
     self.mission = None
     self.mission_record = None
     self.setMissionXML(missionXML)
     nAgents = len(missionXML.agentSections)
     self.agent_hosts = []
     self.agent_hosts += [MalmoPython.AgentHost() for n in range(nAgents)]
     self.agent_hosts[0].parse(sys.argv)
     if self.receivedArgument('recording_dir'):
         recordingsDirectory = malmoutils.get_recordings_directory(
             self.agent_hosts[0])
         self.mission_record.recordRewards()
         self.mission_record.recordObservations()
         self.mission_record.recordCommands()
         self.mission_record.setDestination(recordingsDirectory + "//" +
                                            "lastRecording.tgz")
         if self.agent_hosts[0].receivedArgument("record_video"):
             self.mission_record.recordMP4(24, 2000000)
     self.client_pool = MalmoPython.ClientPool()
     for x in range(10000, 10000 + nAgents):
         self.client_pool.add(MalmoPython.ClientInfo(serverIp, x))
     self.worldStates = [None] * nAgents
     self.observe = [None] * nAgents
     self.isAlive = [True] * nAgents
     self.frames = [None] * nAgents
     self.segmentation_frames = [None] * nAgents
 def get_client_pool(self):
     my_client_pool = MalmoPython.ClientPool()
     my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 20000))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003))
     return my_client_pool
Esempio n. 3
0
    def __init__(self, mission_xml, num_agents):
        self.mission = MalmoPython.MissionSpec(mission_xml, True)
        self.mission_record = MalmoPython.MissionRecordSpec()
        self.num_agents = num_agents
        self.experiment_ID = str(uuid.uuid4())
        self.client_pool = MalmoPython.ClientPool()
        for x in range(10000, 10000 + NUM_AGENTS + 1):
            self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', x))

        # Create one agent host for parsing
        self.agent_hosts = [MalmoPython.AgentHost()]

        try:
            self.agent_hosts[0].parse(sys.argv)
        except RuntimeError as e:
            print('ERROR:', e)
            print(self.agent_hosts[0].getUsage())
            exit(1)
        if self.agent_hosts[0].receivedArgument("help"):
            print(self.agent_hosts[0].getUsage())
            exit(0)

        # Create the rest of the agent hosts.
        if self.num_agents > 1:
            self.agent_hosts += [
                MalmoPython.AgentHost() for x in range(NUM_AGENTS - 1)
            ]
Esempio n. 4
0
def run_mission(rambo_steve, episode):
    agent_host = MalmoPython.AgentHost()

    try:
        agent_host.parse(sys.argv)
    except RuntimeError as e:
        print('ERROR:', e)
        print(agent_host.getUsage())
        exit(1)
    if agent_host.receivedArgument('help'):
        print(agent_host.getUsage())

    my_mission = MalmoPython.MissionSpec(world.getMissionXML(), True)
    # adding the recordedFileName into MissionRecordSpec
    my_mission_record = MalmoPython.MissionRecordSpec()
    # my_mission = malmoutils.get_default_recording_object(agent_host, "Mission")
    # adding the spec for adding the recording of the video
    # my_mission.requestVideo(1280, 720)
    # my_mission_record.recordMP4(30, 2000000)

    # set up client to connect:
    my_clients = MalmoPython.ClientPool()
    for i in range(5):
        my_clients.add(
            MalmoPython.ClientInfo('127.0.0.1', c.MISSION_CONTROL_PORT + i))

    # Attempt to start a mission:
    print('Attempting to start mission...')
    max_retries = 5
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record,
                                    0, "RamboSteve")
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print('Error starting mission:', e)
                exit(1)
            else:
                time.sleep(2)

    # Loop until mission starts:
    print('Waiting for the mission to start ', end=' ')
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print('.', end='')
        time.sleep(0.1)
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print('Error:', error.text)

    print()
    print('Mission running ', end=' ')

    rambo_steve.run(agent_host, episode)

    print()
    print('Mission ended')
    time.sleep(2)
Esempio n. 5
0
    def __init__(self, _):
        # graphing the returns
        self.log_frequency = 1

        self.episode_step = 0
        self.episode_return = 0
        self.returns = []
        self.steps = []

        # DISCRETE ACTION SPACE [0, 5]:
        # - action 0 = attack
        # - action 1 = switch to sword
        # - action 2 = switch to axe
        # - action 3 = use gapple
        # - action 4 = use shield (1 second)
        # - action 5 = idle
        self.action_space = Discrete(6)

        # CONTINUOUS OBSERVATION SPACE:
        # - enemy in range: true=1, false=0
        # - my health normalized: [0, 1]
        # - enemy health normalized: [0, 1]
        # - enemy weapon: axe=1, sword=0.75, gapple=0.25, shield=0 (offensive to defensive scale)
        # - distance apart from both agents
        self.observation_space = Box(0, 1, shape=(5, ), dtype=np.float32)

        ###################################
        # Malmo parameters
        self.agent_hosts = [Malmo.AgentHost() for _ in range(2)]
        # Create client pool
        self.client_pool = Malmo.ClientPool()
        self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10000))
        self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10001))

        ###################################
        # Custom parameters
        self.mission_index = 0

        ###################################
        # self-play parameters
        #self.opponent_policy = load_trained_agent(CURRENT_CHECKPOINT)
        self.use_self_play = False

        self.first_reset = True
Esempio n. 6
0
 def get_client_pool(self):
     """ 
     Malmo specific function: To create client pool for connecting to the minecraft server
     """
     my_client_pool = MalmoPython.ClientPool()
     my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 20000))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002))
     # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003))
     return my_client_pool
Esempio n. 7
0
def initalizeMinecraftMap(xml):
    agent_host = MalmoPython.AgentHost()
    
    my_mission = MalmoPython.MissionSpec(xml, True)
    recordedFileName = recordPath.format("final_take0_bad.tgz") #comment out to not capture video
    #my_mission_record = MalmoPython.MissionRecordSpec()
    my_mission_record = MalmoPython.MissionRecordSpec(recordedFileName) #comment out to not capture video
    my_mission.requestVideo(1200,720)
    my_mission_record.recordMP4(30, 2000000) #comment out to not capture video
    my_mission.setViewpoint(1)
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available

    return (my_mission,agent_host,my_clients,my_mission_record)
Esempio n. 8
0
        def __init__(self, port=None, existing=False):
            self.existing = existing

            if not existing:
                if not port:
                    port = InstanceManager._get_valid_port()
                cmd = InstanceManager.MC_COMMAND
                if InstanceManager.headless:
                    cmd += " -headless "
                cmd += " -port " + str(port)
                logger.info("Starting Minecraft process: " + cmd)

                args = shlex.split(cmd)
                proc = subprocess.Popen(
                    args,
                    cwd=InstanceManager.MINECRAFT_DIR,
                    # pipe entire output
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    # use process group, see http://stackoverflow.com/a/4791612/18576
                    preexec_fn=os.setsid)
                # wait until Minecraft process has outputed "CLIENT enter state: DORMANT"
                while True:
                    line = proc.stdout.readline()
                    logger.debug(line)
                    if not line:
                        raise EOFError(
                            "Minecraft process finished unexpectedly")
                    if b"CLIENT enter state: DORMANT" in line:
                        break
                logger.info("Minecraft process ready")
                # supress entire output, otherwise the subprocess will block
                # NB! there will be still logs under Malmo/Minecraft/run/logs
                # FNULL = open(os.devnull, 'w')
                FMINE = open('./minecraft.log', 'w')
                proc.stdout = FMINE
                self.proc = proc
            else:
                assert port is not None, "No existing port specified."

            self.ip = InstanceManager.DEFAULT_IP
            self.port = port
            self.existing = existing
            self.locked = False

            # Creating client pool.
            logger.info("Creating client pool for {}".format(self))
            self.client_pool = MalmoPython.ClientPool()
            self.client_pool.add(MalmoPython.ClientInfo(self.ip, self.port))
Esempio n. 9
0
    def __init__(self, _):
        # Graphing the returns
        self.step_rewards = []

        # DISCRETE ACTION SPACE [0, 5]:
        # - action 0 = attack
        # - action 1 = switch to sword
        # - action 2 = switch to axe
        # - action 3 = use gapple
        # - action 4 = use shield (1 second)
        # - action 5 = idle
        self.action_space = Discrete(6)

        # CONTINUOUS OBSERVATION SPACE:
        # - enemy in range: true=1, false=0
        # - my health normalized: [0, 1]
        # - enemy health normalized: [0, 1]
        # - enemy weapon: axe=1, sword=0.75, gapple=0.25, shield=0 (offensive to defensive scale)
        self.observation_space = Box(0, 1, shape=(4, ), dtype=np.float32)

        ###################################
        # Malmo parameters
        self.agent_hosts = [Malmo.AgentHost() for _ in range(2)]
        # Create client pool
        self.client_pool = Malmo.ClientPool()
        self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10001))
        self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10002))
        self.mission_index = 0
        self.old_checkpoint = -1

        ###################################
        # self-play parameters
        self.opponent_policy = load_trained_agent(get_current_checkpoint())
        self.use_self_play = False
        self.last_load = 0
        self.first_reset = True
Esempio n. 10
0
def create_malmo_components():
  # setup client pool
  client_pool = MalmoPython.ClientPool()
  for port in map(int, opts.malmo_ports.split(",")):
    print >>sys.stderr, "adding client with port %d" % port
    client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port))
  # setup agent host
  malmo = MalmoPython.AgentHost()
  # can't do this without more complex caching of world state vid frames
  #malmo.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)
  # load mission spec
  mission = MalmoPython.MissionSpec(specs.classroom(opts, overclock_tick_ms), True)
  mission_record = MalmoPython.MissionRecordSpec()
  # return all
  return client_pool, malmo, mission, mission_record
Esempio n. 11
0
    def run(self):
        """Runs the game with the registered agents

        Raises:
            :class:`jason_malmo.exceptions.NoAgentsException`: There are not registered agents in the game.\n
                Register an agent before running the game::

                    game.register('/path/to/file.asl')
                    game.run()
        """
        self._client_pool = MalmoPython.ClientPool()

        if not len(self._agents):
            raise NoAgentsException

        for port in range(10000, 10000 + len(self._agents) + 1):
            self._client_pool.add(MalmoPython.ClientInfo('127.0.0.1', port))

        self._my_mission = MalmoPython.MissionSpec(self._get_mission_xml(),
                                                   True)

        for (index, agent) in enumerate(self._agents):
            malmoutils.parse_command_line(agent.malmo_agent)
            self._safe_start_mission(
                agent.malmo_agent, self._my_mission, self._client_pool,
                malmoutils.get_default_recording_object(
                    agent.malmo_agent, "saved_data"), index, '')
        self._safe_wait_for_start(
            [agent.malmo_agent for agent in self._agents])

        threads = []
        for agent in self._agents:
            thr = threading.Thread(target=self._jason_env.run_agent,
                                   args=(agent, ),
                                   kwargs={})
            thr.start()
            threads.append(thr)

        # TODO while mission is running
        while True:
            for agent in self._agents:
                for (belief, value) in agent.beliefs.items():
                    if belief[0] == 'tasks':
                        tasks = []
                        for task in list(value)[0].args[0]:
                            tasks.append(task)
                        self.tasks.handle(agent, tasks)
            time.sleep(0.05)
    def StartServer(self, names, ip='127.0.0.1'):
        """
            Initiates a server given a mission XML and a list of names of the agents
        """
        for i, name in enumerate(names):
            n = 10000 + i
            self.clientPool.add(MalmoPython.ClientInfo(ip, n))

            self.agents.append(MultiAgent(name, self.missionXML, i))

        malmoutils.parse_command_line(self.agents[0].host)

        for a in self.agents:
            a.StartMission(self.clientPool)

        self.safeWaitForStart(self.agents)
Esempio n. 13
0
    def start(self):

        self.malmo_client_pool = MalmoPython.ClientPool()
        self.malmo_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001))  # 10000 in use - try 10001

        self.malmo_mission = MalmoPython.MissionSpec(self.missionXML, True)
        self.malmo_mission.forceWorldReset()

        self.malmo_mission_record = MalmoPython.MissionRecordSpec()

        self.malmo_mission.requestVideo(800, 500)
        self.malmo_mission.setViewpoint(1)

        # Attempt to start a mission:
        max_retries = 10
        for retry in range(max_retries):
            try:
                self.agent_host.startMission(self.malmo_mission, self.malmo_mission_record )
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print("Error starting mission:",e)
                    exit(1)
                else:
                    time.sleep(2)

        # Loop until mission starts:
        print("Waiting for the mission to start ")
        self.world_state = self.agent_host.getWorldState()

        while not self.world_state.has_mission_begun:
            sys.stdout.write(".")
            time.sleep(0.1)
            self.world_state = self.agent_host.getWorldState()
            for error in self.world_state.errors:
                print("Error:", error.text)

        print(" ")
        print("Mission running ")

        self.number += 1
        self.start_time = time.time()
        self.end_time = None
Esempio n. 14
0
    def init_malmo(self):
        """
        Initialize new Malmo mission.
        """

        # Load the XML file and create mission spec & record.
        mission_file = './mission.xml'
        with open(mission_file, 'r') as f:
            print("Loading mission from %s" % mission_file)
            mission_xml = f.read()
            my_mission = MalmoPython.MissionSpec(mission_xml, True)
            my_mission_record = MalmoPython.MissionRecordSpec()
            my_mission.requestVideo(800, 500)
            my_mission.setViewpoint(1)

        # Attempt to start Malmo.
        max_retries = 3
        my_clients = MalmoPython.ClientPool()
        my_clients.add(MalmoPython.ClientInfo(
            '127.0.0.1', 10000))  # add Minecraft machines here as available
        for retry in range(max_retries):
            try:
                self.agent_host.startMission(my_mission, my_clients,
                                             my_mission_record, 0, 'Agent')
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print("Error starting mission:", e)
                    exit(1)
                else:
                    time.sleep(2)

        # Start the world.
        world_state = self.agent_host.getWorldState()
        while not world_state.has_mission_begun:
            time.sleep(0.1)
            world_state = self.agent_host.getWorldState()
            for error in world_state.errors:
                print("\nError:", error.text)
        self.initialize()
        return world_state
Esempio n. 15
0
        agent.sendCommand("attack 1")
        agent.sendCommand("attack 0")

        AGENT_COOLDOWNS[1] = ATTACK_COOLDOWNS[AGENT_WEAPONS[1]]


if __name__ == "__main__":
    # Flush immediately
    print = functools.partial(print, flush=True)

    # Create agent host
    agent_hosts = [Malmo.AgentHost() for _ in range(AGENT_COUNT)]

    # Create client pool
    client_pool = Malmo.ClientPool()
    client_pool.add(Malmo.ClientInfo("127.0.0.1", 10000))
    client_pool.add(Malmo.ClientInfo("127.0.0.1", 10002))

    for a in range(MISSION_COUNT):
        print(f"Running mission #{a}...")
        # Create missions
        mission = Malmo.MissionSpec(get_mission_xml(), True)
        mission_id = str(uuid.uuid4())

        # Start mission
        for a in range(AGENT_COUNT):
            start_mission(agent_hosts[a], mission, client_pool,
                          Malmo.MissionRecordSpec(), a, mission_id)

        wait_for_start(agent_hosts)
    <AgentHandlers>
      <DiscreteMovementCommands/>
      <RewardForCollectingItem>
        <Item reward="10" type="dirt"/>
      </RewardForCollectingItem>
      <RewardForDiscardingItem>
        <Item reward="100" type="dirt"/>
      </RewardForDiscardingItem>
    </AgentHandlers>
  </AgentSection>
  
</Mission>'''
my_mission = MalmoPython.MissionSpec(xml,True)

client_pool = MalmoPython.ClientPool()
client_pool.add( MalmoPython.ClientInfo('127.0.0.1',10000) )
client_pool.add( MalmoPython.ClientInfo('127.0.0.1',10001) )

MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF)

def safeStartMission(agent_host, mission, client_pool, recording, role, experimentId):
    used_attempts = 0
    max_attempts = 5
    print("Calling startMission for role", role)
    while True:
        try:
            agent_host.startMission(mission, client_pool, recording, role, experimentId)
            break
        except MalmoPython.MissionException as e:
            errorCode = e.details.errorCode
            if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP:
Esempio n. 17
0
        </AgentHandlers>
      </AgentSection>'''

    xml += '</Mission>'
    return xml

# Set up a client pool.
# IMPORTANT: If ANY of the clients will be on a different machine, then you MUST
# make sure that any client which can be the server has an IP address that is
# reachable from other machines - ie DO NOT SIMPLY USE 127.0.0.1!!!!
# The IP address used in the client pool will be broadcast to other agents who
# are attempting to find the server - so this will fail for any agents on a
# different machine.
client_pool = MalmoPython.ClientPool()
for x in range(10000, 10000 + NUM_AGENTS + 1):
    client_pool.add( MalmoPython.ClientInfo('127.0.0.1', x) )

# Keep score of how our robots are doing:
survival_scores = [0 for x in range(NUM_AGENTS)]    # Lasted to the end of the mission without dying.
apple_scores = [0 for x in range(NUM_AGENTS)]       # Collecting apples is good.
zombie_kill_scores = [0 for x in range(NUM_AGENTS)] # Good! Help rescue humanity from zombie-kind.
player_kill_scores = [0 for x in range(NUM_AGENTS)] # Bad! Don't kill the other players!

num_missions = 5 if INTEGRATION_TEST_MODE else 30000
for mission_no in range(1, num_missions+1):
    print("Running mission #" + str(mission_no))
    # Create mission xml - use forcereset if this is the first mission.
    my_mission = MalmoPython.MissionSpec(getXML("true" if mission_no == 1 else "false"), True)

    # Generate an experiment ID for this mission.
    # This is used to make sure the right clients join the right servers -
Esempio n. 18
0
def run(argv=['']):
    if "MALMO_XSD_PATH" not in os.environ:
        print("Please set the MALMO_XSD_PATH environment variable.")
        return

    malmoutils.fix_print()

    agent_host = MalmoPython.AgentHost()
    malmoutils.parse_command_line(agent_host, argv)

    my_mission = MalmoPython.MissionSpec()
    my_mission.timeLimitInSeconds( 10 )
    my_mission.requestVideo( 320, 240 )
    my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 )

    my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data")

    # client_info = MalmoPython.ClientInfo('localhost', 10000)
    client_info = MalmoPython.ClientInfo('127.0.0.1', 10000)
    pool = MalmoPython.ClientPool()
    pool.add(client_info)

    experiment_id = str(uuid.uuid1())
    print("experiment id " + experiment_id)

    max_retries = 3
    max_response_time = 60  # seconds

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id)
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:",e)
                exit(1)
            else:
                time.sleep(2)

    print("Waiting for the mission to start", end=' ')
    start_time = time.time()
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        if time.time() - start_time > max_response_time:
            print("Max delay exceeded for mission to begin")
            restart_minecraft(world_state, agent_host, client_info, "begin mission")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:",error.text)
    print()

    last_delta = time.time()
    # main loop:
    while world_state.is_mission_running:
        agent_host.sendCommand( "move 1" )
        agent_host.sendCommand( "turn " + str(random.random()*2-1) )
        time.sleep(0.5)
        world_state = agent_host.getWorldState()
        print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state)
        if (world_state.number_of_video_frames_since_last_state > 0 or
           world_state.number_of_observations_since_last_state > 0 or
           world_state.number_of_rewards_since_last_state > 0):
            last_delta = time.time()
        else:
            if time.time() - last_delta > max_response_time:
                print("Max delay exceeded for world state change")
                restart_minecraft(world_state, agent_host, client_info, "world state change")
        for reward in world_state.rewards:
            print("Summed reward:",reward.getValue())
        for error in world_state.errors:
            print("Error:",error.text)
        for frame in world_state.video_frames:
            print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels')
            #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image
    print("Mission has stopped.")
def deep_q_learning_run(sess,
                        agent_host,
                        q_estimator,
                        state_processor,
                        experiment_dir,
                        epsilon_start=1.0,
                        epsilon_end=0.1,
                        epsilon_decay_steps=8000):
    """
    Q-Learning algorithm for off-policy TD control using Function Approximation.
    Finds the optimal greedy policy while following an epsilon-greedy policy.
    Args:
        sess: Tensorflow Session object
        env: OpenAI environment
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for the targets
        state_processor: A StateProcessor object
        num_episodes: Number of episodes to run for
        experiment_dir: Directory to save Tensorflow summaries in
        replay_memory_size: Size of the replay memory
        replay_memory_init_size: Number of random experiences to sampel when initializing
          the reply memory.
        update_target_estimator_every: Copy parameters from the Q estimator to the
          target estimator every N steps
        discount_factor: Gamma discount factor
        epsilon_start: Chance to sample a random action when taking an action.
          Epsilon is decayed over time and this is the start value
        epsilon_end: The final minimum value of epsilon after decaying is done
        epsilon_decay_steps: Number of steps to decay epsilon over
        batch_size: Size of batches to sample from the replay memory
        record_video_every: Record a video every N episodes
    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    """
    mission_file = agent_host.getStringArgument('mission_file')
    with open(mission_file, 'r') as f:
        print("Loading mission from %s" % mission_file)
        mission_xml = f.read()
        my_mission = MalmoPython.MissionSpec(mission_xml, True)
    my_mission.removeAllCommandHandlers()
    my_mission.allowAllDiscreteMovementCommands()
    my_mission.setViewpoint(2)
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo(
        '127.0.0.1', 10000))  # add Minecraft machines here as available

    max_retries = 3
    agentID = 0
    expID = 'Deep_q_learning memory'

    # Create directories for checkpoints and summaries
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    print("Checkpoint dir is:", checkpoint_dir)
    saver = tf.train.Saver()
    # Load a previous checkpoint if we find one
    # latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    # print("~~~~~~~~~~~~~~", latest_checkpoint)
    # exit(0)
    latest_checkpoint = os.path.join(checkpoint_dir, "model")
    if latest_checkpoint:
        print("Loading model checkpoint {}...\n".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    total_t = sess.run(tf.contrib.framework.get_global_step())

    # The epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)

    # The policy we're following
    policy = make_epsilon_greedy_policy(q_estimator, len(actionSet))

    my_mission_record = malmoutils.get_default_recording_object(
        agent_host, "save_%s-rep" % (expID))

    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record,
                                    agentID, "%s" % (expID))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2.5)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        print("Sleeping")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    agent_host.sendCommand("look -1")
    agent_host.sendCommand("look -1")

    while world_state.is_mission_running and all(
            e.text == '{}' for e in world_state.observations):
        print("Sleeping....")
        world_state = agent_host.peekWorldState()
    # Populate the replay memory with initial experience

    while world_state.number_of_observations_since_last_state <= 0 and world_state.is_mission_running:
        # print("Sleeping")
        time.sleep(0.1)
        world_state = agent_host.peekWorldState()

    state = gridProcess(
        world_state
    )  # MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset()
    state = state_processor.process(sess, state)
    state = np.stack([state] * 4, axis=2)

    stepNum = 0
    while world_state.is_mission_running:

        action = randint(0, 3)
        print("actions:", action)
        # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action
        # print("Sending command: ", actionSet[action])
        print("Step %s" % stepNum)
        stepNum += 1
        agent_host.sendCommand(actionSet[action])

        world_state = agent_host.peekWorldState()

        num_frames_seen = world_state.number_of_video_frames_since_last_state

        while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen:
            world_state = agent_host.peekWorldState()

        if world_state.is_mission_running:
            # Getting the reward from taking a step
            while world_state.number_of_observations_since_last_state <= 0:
                time.sleep(0.1)
                world_state = agent_host.peekWorldState()
            # world_state = agent_host.getWorldState()
            next_state = gridProcess(world_state)
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:, :, 1:],
                                   np.expand_dims(next_state, 2),
                                   axis=2)
            state = next_state
        # time.sleep(1)

    return None
Esempio n. 20
0
    def __init__(self,
                 world_def,
                 video_dim=(32, 32),
                 num_parallel=1,
                 time_limit=20,
                 reset=True,
                 discrete_actions=False,
                 vision_observation=False,
                 depth=False,
                 num_frames=1,
                 grayscale=True):
        self.video_width, self.video_height = video_dim
        self.image_width, self.image_height = video_dim
        self.discrete_actions = discrete_actions
        self.vision_observation = vision_observation
        self.depth = depth
        self.num_parallel = num_parallel

        self.world_def = world_def
        self.mission = self.world_def.generate_mission(reset=reset)
        #self.XGoalPos, self.YGoalPos = self.world_def.goal_pos[0], self.world_def.goal_pos[2]

        self.mission.requestVideo(self.video_height, self.video_width)
        self.mission.observeRecentCommands()
        self.mission.allowAllContinuousMovementCommands()
        self.mission.timeLimitInSeconds(time_limit)

        if self.num_parallel > 1:
            self.client_pool = MalmoPython.ClientPool()
            for i in range(num_parallel):
                port = 10000 + i
                self.client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port))

        self.agent_host = MalmoPython.AgentHost()
        self.agent_host.setObservationsPolicy(
            MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS)
        # self.agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)

        #self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.KEEP_ALL_FRAMES)
        self.agent_host.setVideoPolicy(
            MalmoPython.VideoPolicy.LATEST_FRAME_ONLY)

        self.mission_record_spec = MalmoPython.MissionRecordSpec()

        if discrete_actions:
            self._action_set = {
                0: "move 1",
                1: "turn 0.5",
                2: "turn -0.5",
                3: None
            }
            self.action_space = Discrete(n=len(self._action_set))
        else:
            self._action_set = [
                ("move", (-1, 1)),
                ("turn", (-1, 1)),
                ("pitch", (-1, 1)),
                ("use", (0, 1)),
                ("jump", (0, 1)),
            ]
            # self._action_set = [("move", (0, 1)),
            #                     ("move", (-1, 0)),
            #                     ("turn", (0, 1)),
            #                     ("turn", (-1, 0))]

            lower_bound = np.asarray([x[1][0] for x in self._action_set])
            upper_bound = np.asarray([x[1][1] for x in self._action_set])
            self.action_space = Box(lower_bound, upper_bound)

        self.num_frames = num_frames
        self.grayscale = grayscale
        if self.grayscale:
            self.num_frame_channels = 1
            high = 1
        else:
            self.num_frame_channels = 3
            high = 255

        # Obs keys and bounds
        x_bounds = self.world_def.x_bounds
        z_bounds = self.world_def.z_bounds
        self.max_dist = np.linalg.norm((x_bounds[-1], z_bounds[-1]))
        self.minDistanceFromGoal = None
        if self.vision_observation:
            self.observation_space = Box(
                low=0,
                high=high,
                shape=(self.image_height, self.image_width,
                       self.num_frames * self.num_frame_channels))
        else:

            self.obs_keys = [
                (u'XPos', x_bounds),
                (u'YPos', (200, 300)),
                (u'ZPos', z_bounds),
                (u'yaw', (0, 360)),
                (u'pitch', (0, 180)),
                #(u'XGoalPos', x_bounds),
                #(u'YGoalPos', z_bounds),
                (u'DistanceTravelled', (0, 30)),
                (u'distanceFromGoal', (0, self.max_dist))
            ]
            l_bounds = [key[1][0] for key in self.obs_keys]
            u_bounds = [key[1][1] for key in self.obs_keys]
            self.observation_space = Box(np.array(l_bounds),
                                         np.array(u_bounds))

        self.last_obs = None
        self.cum_reward = 0
        self.distance_travelled = 0
        self.terminal = False
        self.jump = 0
Esempio n. 21
0
    def init(self,
             client_pool=None,
             start_minecraft=None,
             continuous_discrete=True,
             add_noop_command=None,
             max_retries=90,
             retry_sleep=10,
             step_sleep=0.001,
             skip_steps=0,
             videoResolution=None,
             videoWithDepth=None,
             observeRecentCommands=None,
             observeHotBar=None,
             observeFullInventory=None,
             observeGrid=None,
             observeDistance=None,
             observeChat=None,
             allowContinuousMovement=None,
             allowDiscreteMovement=None,
             allowAbsoluteMovement=None,
             recordDestination=None,
             recordObservations=None,
             recordRewards=None,
             recordCommands=None,
             recordMP4=None,
             gameMode=None,
             forceWorldReset=None):

        self.max_retries = max_retries
        self.retry_sleep = retry_sleep
        self.step_sleep = step_sleep
        self.skip_steps = skip_steps
        self.forceWorldReset = forceWorldReset
        self.continuous_discrete = continuous_discrete
        self.add_noop_command = add_noop_command

        if videoResolution:
            if videoWithDepth:
                self.mission_spec.requestVideoWithDepth(*videoResolution)
            else:
                self.mission_spec.requestVideo(*videoResolution)

        if observeRecentCommands:
            self.mission_spec.observeRecentCommands()
        if observeHotBar:
            self.mission_spec.observeHotBar()
        if observeFullInventory:
            self.mission_spec.observeFullInventory()
        if observeGrid:
            self.mission_spec.observeGrid(*(observeGrid + ["grid"]))
        if observeDistance:
            self.mission_spec.observeDistance(*(observeDistance + ["dist"]))
        if observeChat:
            self.mission_spec.observeChat()

        if allowContinuousMovement or allowDiscreteMovement or allowAbsoluteMovement:
            # if there are any parameters, remove current command handlers first
            self.mission_spec.removeAllCommandHandlers()

            if allowContinuousMovement is True:
                self.mission_spec.allowAllContinuousMovementCommands()
            elif isinstance(allowContinuousMovement, list):
                for cmd in allowContinuousMovement:
                    self.mission_spec.allowContinuousMovementCommand(cmd)

            if allowDiscreteMovement is True:
                self.mission_spec.allowAllDiscreteMovementCommands()
            elif isinstance(allowDiscreteMovement, list):
                for cmd in allowDiscreteMovement:
                    self.mission_spec.allowDiscreteMovementCommand(cmd)

            if allowAbsoluteMovement is True:
                self.mission_spec.allowAllAbsoluteMovementCommands()
            elif isinstance(allowAbsoluteMovement, list):
                for cmd in allowAbsoluteMovement:
                    self.mission_spec.allowAbsoluteMovementCommand(cmd)

        if start_minecraft:
            # start Minecraft process assigning port dynamically
            self.mc_process, port = minecraft_py.start()
            logger.info(
                "Started Minecraft on port %d, overriding client_pool.", port)
            client_pool = [('127.0.0.1', port)]

        if client_pool:
            if not isinstance(client_pool, list):
                raise ValueError(
                    "client_pool must be list of tuples of (IP-address, port)")
            self.client_pool = MalmoPython.ClientPool()
            for client in client_pool:
                self.client_pool.add(MalmoPython.ClientInfo(*client))

        # TODO: produce observation space dynamically based on requested features

        self.video_height = self.mission_spec.getVideoHeight(0)
        self.video_width = self.mission_spec.getVideoWidth(0)
        self.video_depth = self.mission_spec.getVideoChannels(0)
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.video_height,
                                                   self.video_width,
                                                   self.video_depth))
        # dummy image just for the first observation
        self.last_image = np.zeros(
            (self.video_height, self.video_width, self.video_depth),
            dtype=np.uint8)

        self._create_action_space()

        # mission recording
        self.mission_record_spec = MalmoPython.MissionRecordSpec(
        )  # record nothing
        if recordDestination:
            self.mission_record_spec.setDestination(recordDestination)
        if recordRewards:
            self.mission_record_spec.recordRewards()
        if recordCommands:
            self.mission_record_spec.recordCommands()
        if recordMP4:
            self.mission_record_spec.recordMP4(*recordMP4)

        if gameMode:
            if gameMode == "spectator":
                self.mission_spec.setModeToSpectator()
            elif gameMode == "creative":
                self.mission_spec.setModeToCreative()
            elif gameMode == "survival":
                logger.warn(
                    "Cannot force survival mode, assuming it is the default.")
            else:
                assert False, "Unknown game mode: " + gameMode
Esempio n. 22
0
def main():

    #Hardcode number of agents to play song
    num_agents = 4

    #Obtain song csv and get solutions
    #freq_list = mt.create_note_list("Twinkle_Twinkle_Little_Star.csv",120,7000,-.08) #1 Agent
    #freq_list = mt.create_note_list("Chopsticks.csv",120,4000,-.15,.03) #2 Agents
    freq_list = mt.create_note_list("Bad_Apple.csv", 120, 3000, -.08,
                                    .03)  #2 Agents
    #freq_list = mt.create_note_list("Grenade_120BPM.csv",120,1500,-.08,.03) #4 Agents
    freq_list = mt.number_converter(freq_list)
    solutions = cs.get_solutions(freq_list, num_agents)
    print(solutions)
    #print(solutions)

    #Get Mission. Needed for teleport positions.
    missionXML = getMissionXML(num_agents)

    #Create musician for each agent and pass teleport positions.
    musicians = []
    for i in range(num_agents):
        agent_positions = generateAgentTeleportPositions(note_positions, i)
        musicians.append(Musician(agent_positions))
    '''
    MALMO
    '''
    print('Starting...', flush=True)

    #Create agents.
    agent_hosts = []
    for i in range(num_agents):
        agent_hosts.append(MalmoPython.AgentHost())

    malmoutils.parse_command_line(agent_hosts[0])

    #Get mission and allow commands for teleport.
    my_mission = MalmoPython.MissionSpec(missionXML, True)
    my_mission.allowAllChatCommands()

    #Add client for each agent needed.
    my_client_pool = MalmoPython.ClientPool()
    for i in range(num_agents):
        my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000 + i))

    MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF)

    #Start mission for each agent
    for i in range(num_agents):
        startMission(
            agent_hosts[i], my_mission, my_client_pool,
            malmoutils.get_default_recording_object(
                agent_hosts[0], "agent_" + str(i + 1) + "_viewpoint_discrete"),
            i, '')

    #Wait for all missions to begin.
    waitForStart(agent_hosts)

    #Pause for simulation to begin.
    time.sleep(1)
    '''
    SIMULATION BEGINS HERE
    '''

    for i in range(len(solutions[0])):

        #teleport each agent to the corresponding note.
        for j in range(len(musicians)):
            musicians[j].teleport_to_noteblock(agent_hosts[j], solutions[j][i])

        # play each note.
        for k in range(len(musicians)):
            if musicians[k].can_play:
                agent_hosts[k].sendCommand("attack 1")

        time.sleep(0.001)

        for k in range(len(musicians)):
            if musicians[k].can_play:
                agent_hosts[k].sendCommand("attack 0")
            musicians[k].can_play = False

        #modifies the timing between each note hit.
        time.sleep(0.2)
Esempio n. 23
0
        if actions[step] == 4: nextx = x - 1
        if table[nextx][nextz] == 1: return False
        # if abs(actions[step] - actions[step - 1]) == 2:
        #     return False
    # if actions[step] == 1: z = z + 1
    # if actions[step] == 2: x = x + 1
    # if actions[step] == 3: z = z - 1
    # if actions[step] == 4: x = x - 1
    # table[x][z] = 1
    return True


my_mission = MalmoPython.MissionSpec(missionXML, True)
my_mission_record = MalmoPython.MissionRecordSpec()
my_client_pool = MalmoPython.ClientPool()
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1",
                                          10001))  #10000 in use - try 10001

# Attempt to start a mission:
max_retries = 3
for retry in range(max_retries):
    try:
        #agent_host.startMission(my_mission, my_mission_record)
        agent_host.startMission(my_mission, my_client_pool, my_mission_record,
                                0, "experimentID2")
        break
    except RuntimeError as e:
        if retry == max_retries - 1:
            print("Error starting mission:", e)
            exit(1)
        else:
            time.sleep(2)
Esempio n. 24
0
def deep_q_learning(sess,
                    agent_host,
                    q_estimator,
                    target_estimator,
                    state_processor,
                    num_episodes,
                    experiment_dir,
                    replay_memory_size=500000,
                    replay_memory_init_size=50000,
                    update_target_estimator_every=10000,
                    discount_factor=0.99,
                    epsilon_start=1.0,
                    epsilon_end=0.1,
                    epsilon_decay_steps=50000,
                    batch_size=32,
                    record_video_every=100):
    """
    Q-Learning algorithm for off-policy TD control using Function Approximation.
    Finds the optimal greedy policy while following an epsilon-greedy policy.
    Args:
        sess: Tensorflow Session object
        env: OpenAI environment
        q_estimator: Estimator object used for the q values
        target_estimator: Estimator object used for the targets
        state_processor: A StateProcessor object
        num_episodes: Number of episodes to run for
        experiment_dir: Directory to save Tensorflow summaries in
        replay_memory_size: Size of the replay memory
        replay_memory_init_size: Number of random experiences to sampel when initializing
          the reply memory.
        update_target_estimator_every: Copy parameters from the Q estimator to the
          target estimator every N steps
        discount_factor: Gamma discount factor
        epsilon_start: Chance to sample a random action when taking an action.
          Epsilon is decayed over time and this is the start value
        epsilon_end: The final minimum value of epsilon after decaying is done
        epsilon_decay_steps: Number of steps to decay epsilon over
        batch_size: Size of batches to sample from the replay memory
        record_video_every: Record a video every N episodes
    Returns:
        An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards.
    """
    mission_file = agent_host.getStringArgument('mission_file')
    with open(mission_file, 'r') as f:
        print("Loading mission from %s" % mission_file)
        mission_xml = f.read()
        my_mission = MalmoPython.MissionSpec(mission_xml, True)
    my_mission.removeAllCommandHandlers()
    my_mission.allowAllDiscreteMovementCommands()
    my_mission.setViewpoint(2)
    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000))  # add Minecraft machines here as available

    max_retries = 3
    agentID = 0
    expID = 'Deep_q_learning memory'



    Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"])

    # The replay memory
    replay_memory = []

    # Keeps track of useful statistics
    stats = plotting.EpisodeStats(
        episode_lengths=np.zeros(num_episodes),
        episode_rewards=np.zeros(num_episodes))

    # Create directories for checkpoints and summaries
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    monitor_path = os.path.join(experiment_dir, "monitor")

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    if not os.path.exists(monitor_path):
        os.makedirs(monitor_path)

    saver = tf.train.Saver()
    # Load a previous checkpoint if we find one
    latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
    if latest_checkpoint:
        print("Loading model checkpoint {}...\n".format(latest_checkpoint))
        saver.restore(sess, latest_checkpoint)

    total_t = sess.run(tf.contrib.framework.get_global_step())

    # The epsilon decay schedule
    epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps)

    # The policy we're following
    policy = make_epsilon_greedy_policy(
        q_estimator,
        len(actionSet))

    my_mission_record = malmoutils.get_default_recording_object(agent_host,
                                                                "./save_%s-rep" % (expID))


    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" %(expID))
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2.5)

    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        print("Sleeping")
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)
    print()

    # world_state = agent_host.peekWorldState()
    while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
        print("Sleeping....")
        world_state = agent_host.peekWorldState()

    world_state = agent_host.getWorldState()

    # Populate the replay memory with initial experience
    print("Populating replay memory...")

    while world_state.number_of_observations_since_last_state <= 0:
        # print("Sleeping")
        time.sleep(0.1)
        world_state = agent_host.peekWorldState()

    state = gridProcess(world_state) #MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset()
    state = state_processor.process(sess, state)
    state = np.stack([state] * 4, axis=2)

    for i in range(replay_memory_init_size):
        print("%s th replay memory" %i)

        action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)])
        action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
        # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action
        # print("Sending command: ", actionSet[action])
        agent_host.sendCommand(actionSet[action])
        #checking if the mission is done
        world_state = agent_host.peekWorldState()
        #Getting the reward from taking a step
        if world_state.number_of_rewards_since_last_state > 0:
            reward = world_state.rewards[-1].getValue()
            print("Just received the reward: %s on action: %s "%(reward, actionSet[action]))
        else:
            print("No reward")
            reward = 0
        #getting the next state
        while world_state.number_of_observations_since_last_state <=0 and world_state.is_mission_running:
            print("Sleeping")
            time.sleep(0.1)
            world_state = agent_host.peekWorldState()

        if world_state.is_mission_running:
            next_state = gridProcess(world_state)
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)
            done = not world_state.is_mission_running
            replay_memory.append(Transition(state, action, reward, next_state, done))
            state = next_state
        else:
            for retry in range(max_retries):
                try:
                    agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID))
                    break
                except RuntimeError as e:
                    if retry == max_retries - 1:
                        print("Error starting mission:", e)
                        exit(1)
                    else:
                        time.sleep(2.5)

            world_state = agent_host.getWorldState()
            while not world_state.has_mission_begun:
                print(".", end="")
                time.sleep(0.1)
                world_state = agent_host.getWorldState()
            world_state = agent_host.peekWorldState()
            while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
                world_state = agent_host.peekWorldState()
            world_state = agent_host.getWorldState()
            if not world_state.is_mission_running:
                print("Breaking")
                break
            state = gridProcess(world_state) # Malmo GetworldState? / env.reset()
            state = state_processor.process(sess, state)
            state = np.stack([state] * 4, axis=2)

    print("Finished populating memory")

    # Record videos
    # Use the gym env Monitor wrapper
    # env = Monitor(env,
    #               directory=monitor_path,
    #               resume=True,
    #               video_callable=lambda count: count % record_video_every ==0)

    # NEED TO RECORD THE VIDEO AND SAVE TO THE SPECIFIED DIRECTORY

    for i_episode in range(num_episodes):
        print("%s-th episode"%i_episode)
        if i_episode != 0:
            mission_file = agent_host.getStringArgument('mission_file')
            with open(mission_file, 'r') as f:
                print("Loading mission from %s" % mission_file)
                mission_xml = f.read()
                my_mission = MalmoPython.MissionSpec(mission_xml, True)
            my_mission.removeAllCommandHandlers()
            my_mission.allowAllDiscreteMovementCommands()
            # my_mission.requestVideo(320, 240)
            my_mission.forceWorldReset()
            my_mission.setViewpoint(2)
            my_clients = MalmoPython.ClientPool()
            my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000))  # add Minecraft machines here as available

            max_retries = 3
            agentID = 0
            expID = 'Deep_q_learning '

            my_mission_record = malmoutils.get_default_recording_object(agent_host,
                                                                        "./save_%s-rep%d" % (expID, i))

            for retry in range(max_retries):
                try:
                    agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i))
                    break
                except RuntimeError as e:
                    if retry == max_retries - 1:
                        print("Error starting mission:", e)
                        exit(1)
                    else:
                        time.sleep(2.5)

            world_state = agent_host.getWorldState()
            print("Waiting for the mission to start", end=' ')
            while not world_state.has_mission_begun:
                print(".", end="")
                time.sleep(0.1)
                world_state = agent_host.getWorldState()
                for error in world_state.errors:
                    print("Error:", error.text)

        # Save the current checkpoint
        saver.save(tf.get_default_session(), checkpoint_path)
        # world_state = agent_host.getWorldState()
        # Reset the environment
        # world_state = agent_host.peekWorldState()
        while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
            # print("Sleeping!!!")
            world_state = agent_host.peekWorldState()
        world_state = agent_host.getWorldState()
        state = gridProcess(world_state)  #MalmoGetWorldState?
        state = state_processor.process(sess, state)
        state = np.stack([state] * 4, axis=2)
        loss = None

        # One step in the environment
        for t in itertools.count():

            # Epsilon for this time step
            epsilon = epsilons[min(total_t, epsilon_decay_steps-1)]

            # Add epsilon to Tensorboard
            episode_summary = tf.Summary()
            episode_summary.value.add(simple_value=epsilon, tag="epsilon")
            q_estimator.summary_writer.add_summary(episode_summary, total_t)

            # Maybe update the target estimator
            if total_t % update_target_estimator_every == 0:
                copy_model_parameters(sess, q_estimator, target_estimator)
                print("\nCopied model parameters to target network.")

            # Print out which step we're on, useful for debugging.
            print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format(
                    t, total_t, i_episode + 1, num_episodes, loss), end="")
            sys.stdout.flush()

            # Take a step
            action_probs = policy(sess, state, epsilon)
            action = np.random.choice(np.arange(len(action_probs)), p=action_probs)
            # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo AgentHost send command?
            # print("Sending command: ", actionSet[action])
            agent_host.sendCommand(actionSet[action])

            world_state = agent_host.peekWorldState()

            if world_state.number_of_rewards_since_last_state > 0:
                reward = world_state.rewards[-1].getValue()
                print("Just received the reward: %s on action: %s " % (reward, actionSet[action]))
            else:
                print("No reward")
                reward = 0
            while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations):
                # print("Sleeping!!!")
                world_state = agent_host.peekWorldState()
            # world_state = agent_host.getWorldState()
            # if not world_state.is_mission_running:
            #     print("Breaking")
            #     break
            done = not world_state.is_mission_running
            print(" IS MISSION FINISHED? ", done)
            # if done:
            #     print("Breaking before updating last reward")
            #     break

            next_state = gridProcess(world_state)
            next_state = state_processor.process(sess, next_state)
            next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2)



            # If our replay memory is full, pop the first element
            if len(replay_memory) == replay_memory_size:
                replay_memory.pop(0)

            # Save transition to replay memory
            replay_memory.append(Transition(state, action, reward, next_state, done))

            # Update statistics
            stats.episode_rewards[i_episode] += reward
            stats.episode_lengths[i_episode] = t

            # Sample a minibatch from the replay memory
            samples = random.sample(replay_memory, batch_size)
            states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples))

            # Calculate q values and targets (Double DQN)
            q_values_next = q_estimator.predict(sess, next_states_batch)
            best_actions = np.argmax(q_values_next, axis=1)
            q_values_next_target = target_estimator.predict(sess, next_states_batch)
            targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \
                discount_factor * q_values_next_target[np.arange(batch_size), best_actions]

            # Perform gradient descent update
            states_batch = np.array(states_batch)
            loss = q_estimator.update(sess, states_batch, action_batch, targets_batch)

            if done:
                print("End of Episode")
                break

            state = next_state
            total_t += 1

        # Add summaries to tensorboard
        episode_summary = tf.Summary()
        episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], node_name="episode_reward", tag="episode_reward")
        episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], node_name="episode_length", tag="episode_length")
        q_estimator.summary_writer.add_summary(episode_summary, total_t)
        q_estimator.summary_writer.flush()

        yield total_t, plotting.EpisodeStats(
            episode_lengths=stats.episode_lengths[:i_episode+1],
            episode_rewards=stats.episode_rewards[:i_episode+1])

    # env.monitor.close()
    return stats
Esempio n. 25
0
validate = True
agent_host = MalmoPython.AgentHost()
try:
    agent_host.parse(sys.argv)
except RuntimeError as e:
    print 'ERROR:', e
    print agent_host.getUsage()
    exit(1)
if agent_host.receivedArgument("help"):
    print agent_host.getUsage()
    exit(0)

# Create a pool of Minecraft Mod clients:
my_client_pool = MalmoPython.ClientPool()
# Add the default client - port 10000 on the local machine:
my_client = MalmoPython.ClientInfo("127.0.0.1", 10000)
my_client_pool.add(my_client)
# Add extra clients here:
# eg my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001)) etc

# Create a unique identifier - different each time this script is run.
# In multi-agent missions all agents must pass the same experimentID, in order to prevent agents from joining the wrong experiments.
experimentID = uuid.uuid4()

# Create a folder to put our recordings - the platform will not create missing folders itself, it will simply throw an exception.
recordingsDirectory = "QuiltRecordings"
try:
    os.makedirs(recordingsDirectory)
except OSError as exception:
    if exception.errno != errno.EEXIST:  # ignore error if already existed
        raise
Esempio n. 26
0
    def __init__(self,
                 maze_def,
                 reset,
                 video_dim=(32, 32),
                 num_parallel=1,
                 time_limit=30,
                 discrete_actions=False,
                 vision_observation=True,
                 depth=False,
                 num_frames=1,
                 grayscale=True):
        self.video_width, self.video_height = video_dim
        self.image_width, self.image_height = video_dim
        self.discrete_actions = discrete_actions
        self.vision_observation = vision_observation
        self.depth = depth
        self.num_parallel = num_parallel

        maze = create_maze(maze_def)
        self.mission_gen = MissionGen()
        self.mission = self.mission_gen.generate_mission(
            maze.create_maze_array(), reset=reset)
        self.XGoalPos, self.YGoalPos = self.mission_gen.goal_pos[
            0], self.mission_gen.goal_pos[2]

        # with open(mission_file, 'r') as f:
        #     print("Loading mission from %s" % mission_file)
        #     mission_xml = f.read()
        #     self.mission = MalmoPython.MissionSpec(mission_xml, True)
        self.mission.requestVideo(self.video_height, self.video_width)
        self.mission.observeRecentCommands()
        self.mission.allowAllContinuousMovementCommands()
        # self.mission.timeLimitInSeconds(time_limit)

        if self.num_parallel > 1:
            self.client_pool = MalmoPython.ClientPool()
            for i in range(num_parallel):
                port = 10000 + i
                self.client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port))

        self.agent_host = MalmoPython.AgentHost()
        self.agent_host.setObservationsPolicy(
            MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS)
        # self.agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY)
        self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.KEEP_ALL_FRAMES)
        # self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY)

        self.mission_record_spec = MalmoPython.MissionRecordSpec()

        if discrete_actions:
            self._action_set = {0: "move 1", 1: "turn 1", 2: "turn -1"}
            self.action_space = Discrete(n=len(self._action_set))
        else:
            # self._action_set = ["move", "turn", "pitch"]
            # self.action_space = Box(np.array([0, -.5, -.5]), np.array([1, .5, .5]))
            self._action_set = [("move", (-1, 1)), ("turn", (-0.5, 0.5))]
            #("jump", (-1, 1))]
            lower_bound = np.asarray([x[1][0] for x in self._action_set])
            upper_bound = np.asarray([x[1][1] for x in self._action_set])
            self.action_space = Box(lower_bound, upper_bound)

        self.num_frames = num_frames
        self.grayscale = grayscale
        if self.grayscale:
            self.num_frame_channels = 1
            high = 1
        else:
            self.num_frame_channels = 3
            high = 255

        # Obs keys and bounds
        x_bounds = self.mission_gen.x_bounds
        z_bounds = self.mission_gen.z_bounds
        self.max_dist = np.linalg.norm((x_bounds[-1], z_bounds[-1]))
        self.minDistanceFromGoal = None
        if self.vision_observation:
            self.observation_space = Box(
                low=0,
                high=high,
                shape=(self.num_frames * self.num_frame_channels,
                       self.image_height, self.image_width))
        else:

            self.obs_keys = [(u'XPos', x_bounds), (u'ZPos', z_bounds),
                             (u'yaw', (0, 360)), (u'XGoalPos', x_bounds),
                             (u'YGoalPos', z_bounds),
                             (u'DistanceTravelled', (0, 30)),
                             (u'distanceFromGoal', (0, self.max_dist))]
            l_bounds = [key[1][0] for key in self.obs_keys]
            u_bounds = [key[1][1] for key in self.obs_keys]
            self.observation_space = Box(np.array(l_bounds),
                                         np.array(u_bounds))
        # self._horizon = env.spec.timestep_limit
        self.last_obs = None
        self.cum_reward = 0
        self.distance_travelled = 0
        self.terminal = False
        self.jump = 0
Esempio n. 27
0
                </RewardForDamagingEntity>
                <ObservationFromNearbyEntities>
                    <Range name="entities" xrange="''' + str(
                              ARENA_WIDTH) + '''" yrange="2" zrange="''' + str(
                                  ARENA_BREADTH) + '''" />
                </ObservationFromNearbyEntities>
                <ObservationFromFullStats/>''' + video_requirements + '''
            </AgentHandlers>
        </AgentSection>

    </Mission>'''


validate = True
my_client_pool = MalmoPython.ClientPool()
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))

episode_reward = 0
if agent_host.receivedArgument("test"):
    num_reps = 1
else:
    num_reps = 10000
fout = open('results.csv', 'w')
# Set up the agent
agent = agentMC.agentMC(agent_host, MAX_ZOMBIES, MAX_DISTANCE, 20)
for i in range(num_reps):
    print('episode:', i)
    for iRepeat in range(1, MAX_ZOMBIES):
        #########################################
        #       Set up the enviornment          #
        #########################################
Esempio n. 28
0
# See if we can parse our extended command line.
malmoutils.parse_command_line(agentHost)

# As we are not recording our video xml should be an empty string.
assert malmoutils.get_video_xml(agentHost) == ''

# Test that we can get a default recording spec.
assert type(malmoutils.get_default_recording_object(
    agentHost, "test")) == MalmoPython.MissionRecordSpec

# Default recordings directory is ''.
assert malmoutils.get_recordings_directory(agentHost) == ''


def clientInfos(cp):
    return [(c.ip_address, c.control_port, c.command_port) for c in cp.clients]


# Test adding some client infos to a client pool.
clientPool = MalmoPython.ClientPool()
assert len(clientPool.clients) == 0
c1 = ("localhost", 10000, 0)
client1 = MalmoPython.ClientInfo(*c1)
clientPool.add(client1)
assert clientInfos(clientPool) == [c1]
c2 = ("127.0.0.1", 10001, 20001)
client2 = MalmoPython.ClientInfo(*c2)
clientPool.add(client2)
assert clientInfos(clientPool) == [c1, c2]
Esempio n. 29
0
		'''
		return missionXML.format(src=seedfile, limit=timelimit, xcoord=random.randint(0,300), zcoord=random.randint(100, 350), tlimit=eptime)
agent_id = 10001
counter = 9019
while counter < numphotos:
    agent_host = MalmoPython.AgentHost()

    try:
        missionXML = generateXMLbySeed()
        my_mission = MalmoPython.MissionSpec(missionXML, True)
        my_mission_record = MalmoPython.MissionRecordSpec()
    except Exception as e:
        print("open mission ERROR: ", e)

    my_clients = MalmoPython.ClientPool()
    my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available
    agent_id += 1
    # Attempt to start a mission:
    max_retries = 3
    for retry in range(max_retries):
        try:
            agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "IMGCOLLECTOR")
            break
        except RuntimeError as e:
            if retry == max_retries - 1:
                print("Error starting mission:", e)
                exit(1)
            else:
                time.sleep(2)

    # Loop until mission starts:
Esempio n. 30
0
                turn_speed = self.angvel(yaw_to_next_point, yaw, 16.0)
                move_speed = (1.0 - abs(turn_speed)) * (1.0 - abs(turn_speed))
                self.agent_host.sendCommand("turn " + str(turn_speed))
                self.agent_host.sendCommand("move " + str(move_speed))
            time.sleep(0.001)


sys.stdout = os.fdopen(sys.stdout.fileno(), 'w',
                       0)  # flush print output immediately

# Create a pool of Minecraft Mod clients.
# By default, mods will choose consecutive mission control ports, starting at 10000,
# so running four mods locally should produce the following pool by default (assuming nothing else
# is using these ports):
my_client_pool = MalmoPython.ClientPool()
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000))
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001))
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002))
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003))
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10004))
my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10005))

# Create one agent host for parsing:
parser = MalmoPython.AgentHost()
options = [("nn", "n", RouteGenerators.NearestNeighbour, True),
           ("gen-al", "g", RouteGenerators.Genetic, False),
           ("div-and-conq", "d", RouteGenerators.DivideAndConquer, False),
           ("mst", "m", RouteGenerators.MinSpanTree, True),
           ("conv-hull", "c", RouteGenerators.Spiral, False),
           ("sa", "s", RouteGenerators.Annealing, True)]