def __init__(self, missionXML, serverIp='127.0.0.1'): self.missionDesc = None self.mission = None self.mission_record = None self.setMissionXML(missionXML) nAgents = len(missionXML.agentSections) self.agent_hosts = [] self.agent_hosts += [MalmoPython.AgentHost() for n in range(nAgents)] self.agent_hosts[0].parse(sys.argv) if self.receivedArgument('recording_dir'): recordingsDirectory = malmoutils.get_recordings_directory( self.agent_hosts[0]) self.mission_record.recordRewards() self.mission_record.recordObservations() self.mission_record.recordCommands() self.mission_record.setDestination(recordingsDirectory + "//" + "lastRecording.tgz") if self.agent_hosts[0].receivedArgument("record_video"): self.mission_record.recordMP4(24, 2000000) self.client_pool = MalmoPython.ClientPool() for x in range(10000, 10000 + nAgents): self.client_pool.add(MalmoPython.ClientInfo(serverIp, x)) self.worldStates = [None] * nAgents self.observe = [None] * nAgents self.isAlive = [True] * nAgents self.frames = [None] * nAgents self.segmentation_frames = [None] * nAgents
def get_client_pool(self): my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 20000)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003)) return my_client_pool
def __init__(self, mission_xml, num_agents): self.mission = MalmoPython.MissionSpec(mission_xml, True) self.mission_record = MalmoPython.MissionRecordSpec() self.num_agents = num_agents self.experiment_ID = str(uuid.uuid4()) self.client_pool = MalmoPython.ClientPool() for x in range(10000, 10000 + NUM_AGENTS + 1): self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', x)) # Create one agent host for parsing self.agent_hosts = [MalmoPython.AgentHost()] try: self.agent_hosts[0].parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_hosts[0].getUsage()) exit(1) if self.agent_hosts[0].receivedArgument("help"): print(self.agent_hosts[0].getUsage()) exit(0) # Create the rest of the agent hosts. if self.num_agents > 1: self.agent_hosts += [ MalmoPython.AgentHost() for x in range(NUM_AGENTS - 1) ]
def run_mission(rambo_steve, episode): agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument('help'): print(agent_host.getUsage()) my_mission = MalmoPython.MissionSpec(world.getMissionXML(), True) # adding the recordedFileName into MissionRecordSpec my_mission_record = MalmoPython.MissionRecordSpec() # my_mission = malmoutils.get_default_recording_object(agent_host, "Mission") # adding the spec for adding the recording of the video # my_mission.requestVideo(1280, 720) # my_mission_record.recordMP4(30, 2000000) # set up client to connect: my_clients = MalmoPython.ClientPool() for i in range(5): my_clients.add( MalmoPython.ClientInfo('127.0.0.1', c.MISSION_CONTROL_PORT + i)) # Attempt to start a mission: print('Attempting to start mission...') max_retries = 5 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "RamboSteve") break except RuntimeError as e: if retry == max_retries - 1: print('Error starting mission:', e) exit(1) else: time.sleep(2) # Loop until mission starts: print('Waiting for the mission to start ', end=' ') world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print('.', end='') time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print('Error:', error.text) print() print('Mission running ', end=' ') rambo_steve.run(agent_host, episode) print() print('Mission ended') time.sleep(2)
def __init__(self, _): # graphing the returns self.log_frequency = 1 self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = [] # DISCRETE ACTION SPACE [0, 5]: # - action 0 = attack # - action 1 = switch to sword # - action 2 = switch to axe # - action 3 = use gapple # - action 4 = use shield (1 second) # - action 5 = idle self.action_space = Discrete(6) # CONTINUOUS OBSERVATION SPACE: # - enemy in range: true=1, false=0 # - my health normalized: [0, 1] # - enemy health normalized: [0, 1] # - enemy weapon: axe=1, sword=0.75, gapple=0.25, shield=0 (offensive to defensive scale) # - distance apart from both agents self.observation_space = Box(0, 1, shape=(5, ), dtype=np.float32) ################################### # Malmo parameters self.agent_hosts = [Malmo.AgentHost() for _ in range(2)] # Create client pool self.client_pool = Malmo.ClientPool() self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10000)) self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10001)) ################################### # Custom parameters self.mission_index = 0 ################################### # self-play parameters #self.opponent_policy = load_trained_agent(CURRENT_CHECKPOINT) self.use_self_play = False self.first_reset = True
def get_client_pool(self): """ Malmo specific function: To create client pool for connecting to the minecraft server """ my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 20000)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003)) return my_client_pool
def initalizeMinecraftMap(xml): agent_host = MalmoPython.AgentHost() my_mission = MalmoPython.MissionSpec(xml, True) recordedFileName = recordPath.format("final_take0_bad.tgz") #comment out to not capture video #my_mission_record = MalmoPython.MissionRecordSpec() my_mission_record = MalmoPython.MissionRecordSpec(recordedFileName) #comment out to not capture video my_mission.requestVideo(1200,720) my_mission_record.recordMP4(30, 2000000) #comment out to not capture video my_mission.setViewpoint(1) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available return (my_mission,agent_host,my_clients,my_mission_record)
def __init__(self, port=None, existing=False): self.existing = existing if not existing: if not port: port = InstanceManager._get_valid_port() cmd = InstanceManager.MC_COMMAND if InstanceManager.headless: cmd += " -headless " cmd += " -port " + str(port) logger.info("Starting Minecraft process: " + cmd) args = shlex.split(cmd) proc = subprocess.Popen( args, cwd=InstanceManager.MINECRAFT_DIR, # pipe entire output stdout=subprocess.PIPE, stderr=subprocess.STDOUT, # use process group, see http://stackoverflow.com/a/4791612/18576 preexec_fn=os.setsid) # wait until Minecraft process has outputed "CLIENT enter state: DORMANT" while True: line = proc.stdout.readline() logger.debug(line) if not line: raise EOFError( "Minecraft process finished unexpectedly") if b"CLIENT enter state: DORMANT" in line: break logger.info("Minecraft process ready") # supress entire output, otherwise the subprocess will block # NB! there will be still logs under Malmo/Minecraft/run/logs # FNULL = open(os.devnull, 'w') FMINE = open('./minecraft.log', 'w') proc.stdout = FMINE self.proc = proc else: assert port is not None, "No existing port specified." self.ip = InstanceManager.DEFAULT_IP self.port = port self.existing = existing self.locked = False # Creating client pool. logger.info("Creating client pool for {}".format(self)) self.client_pool = MalmoPython.ClientPool() self.client_pool.add(MalmoPython.ClientInfo(self.ip, self.port))
def __init__(self, _): # Graphing the returns self.step_rewards = [] # DISCRETE ACTION SPACE [0, 5]: # - action 0 = attack # - action 1 = switch to sword # - action 2 = switch to axe # - action 3 = use gapple # - action 4 = use shield (1 second) # - action 5 = idle self.action_space = Discrete(6) # CONTINUOUS OBSERVATION SPACE: # - enemy in range: true=1, false=0 # - my health normalized: [0, 1] # - enemy health normalized: [0, 1] # - enemy weapon: axe=1, sword=0.75, gapple=0.25, shield=0 (offensive to defensive scale) self.observation_space = Box(0, 1, shape=(4, ), dtype=np.float32) ################################### # Malmo parameters self.agent_hosts = [Malmo.AgentHost() for _ in range(2)] # Create client pool self.client_pool = Malmo.ClientPool() self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10001)) self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10002)) self.mission_index = 0 self.old_checkpoint = -1 ################################### # self-play parameters self.opponent_policy = load_trained_agent(get_current_checkpoint()) self.use_self_play = False self.last_load = 0 self.first_reset = True
def create_malmo_components(): # setup client pool client_pool = MalmoPython.ClientPool() for port in map(int, opts.malmo_ports.split(",")): print >>sys.stderr, "adding client with port %d" % port client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port)) # setup agent host malmo = MalmoPython.AgentHost() # can't do this without more complex caching of world state vid frames #malmo.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) # load mission spec mission = MalmoPython.MissionSpec(specs.classroom(opts, overclock_tick_ms), True) mission_record = MalmoPython.MissionRecordSpec() # return all return client_pool, malmo, mission, mission_record
def run(self): """Runs the game with the registered agents Raises: :class:`jason_malmo.exceptions.NoAgentsException`: There are not registered agents in the game.\n Register an agent before running the game:: game.register('/path/to/file.asl') game.run() """ self._client_pool = MalmoPython.ClientPool() if not len(self._agents): raise NoAgentsException for port in range(10000, 10000 + len(self._agents) + 1): self._client_pool.add(MalmoPython.ClientInfo('127.0.0.1', port)) self._my_mission = MalmoPython.MissionSpec(self._get_mission_xml(), True) for (index, agent) in enumerate(self._agents): malmoutils.parse_command_line(agent.malmo_agent) self._safe_start_mission( agent.malmo_agent, self._my_mission, self._client_pool, malmoutils.get_default_recording_object( agent.malmo_agent, "saved_data"), index, '') self._safe_wait_for_start( [agent.malmo_agent for agent in self._agents]) threads = [] for agent in self._agents: thr = threading.Thread(target=self._jason_env.run_agent, args=(agent, ), kwargs={}) thr.start() threads.append(thr) # TODO while mission is running while True: for agent in self._agents: for (belief, value) in agent.beliefs.items(): if belief[0] == 'tasks': tasks = [] for task in list(value)[0].args[0]: tasks.append(task) self.tasks.handle(agent, tasks) time.sleep(0.05)
def StartServer(self, names, ip='127.0.0.1'): """ Initiates a server given a mission XML and a list of names of the agents """ for i, name in enumerate(names): n = 10000 + i self.clientPool.add(MalmoPython.ClientInfo(ip, n)) self.agents.append(MultiAgent(name, self.missionXML, i)) malmoutils.parse_command_line(self.agents[0].host) for a in self.agents: a.StartMission(self.clientPool) self.safeWaitForStart(self.agents)
def start(self): self.malmo_client_pool = MalmoPython.ClientPool() self.malmo_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001)) # 10000 in use - try 10001 self.malmo_mission = MalmoPython.MissionSpec(self.missionXML, True) self.malmo_mission.forceWorldReset() self.malmo_mission_record = MalmoPython.MissionRecordSpec() self.malmo_mission.requestVideo(800, 500) self.malmo_mission.setViewpoint(1) # Attempt to start a mission: max_retries = 10 for retry in range(max_retries): try: self.agent_host.startMission(self.malmo_mission, self.malmo_mission_record ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) # Loop until mission starts: print("Waiting for the mission to start ") self.world_state = self.agent_host.getWorldState() while not self.world_state.has_mission_begun: sys.stdout.write(".") time.sleep(0.1) self.world_state = self.agent_host.getWorldState() for error in self.world_state.errors: print("Error:", error.text) print(" ") print("Mission running ") self.number += 1 self.start_time = time.time() self.end_time = None
def init_malmo(self): """ Initialize new Malmo mission. """ # Load the XML file and create mission spec & record. mission_file = './mission.xml' with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) # Attempt to start Malmo. max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available for retry in range(max_retries): try: self.agent_host.startMission(my_mission, my_clients, my_mission_record, 0, 'Agent') break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) # Start the world. world_state = self.agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.1) world_state = self.agent_host.getWorldState() for error in world_state.errors: print("\nError:", error.text) self.initialize() return world_state
agent.sendCommand("attack 1") agent.sendCommand("attack 0") AGENT_COOLDOWNS[1] = ATTACK_COOLDOWNS[AGENT_WEAPONS[1]] if __name__ == "__main__": # Flush immediately print = functools.partial(print, flush=True) # Create agent host agent_hosts = [Malmo.AgentHost() for _ in range(AGENT_COUNT)] # Create client pool client_pool = Malmo.ClientPool() client_pool.add(Malmo.ClientInfo("127.0.0.1", 10000)) client_pool.add(Malmo.ClientInfo("127.0.0.1", 10002)) for a in range(MISSION_COUNT): print(f"Running mission #{a}...") # Create missions mission = Malmo.MissionSpec(get_mission_xml(), True) mission_id = str(uuid.uuid4()) # Start mission for a in range(AGENT_COUNT): start_mission(agent_hosts[a], mission, client_pool, Malmo.MissionRecordSpec(), a, mission_id) wait_for_start(agent_hosts)
<AgentHandlers> <DiscreteMovementCommands/> <RewardForCollectingItem> <Item reward="10" type="dirt"/> </RewardForCollectingItem> <RewardForDiscardingItem> <Item reward="100" type="dirt"/> </RewardForDiscardingItem> </AgentHandlers> </AgentSection> </Mission>''' my_mission = MalmoPython.MissionSpec(xml,True) client_pool = MalmoPython.ClientPool() client_pool.add( MalmoPython.ClientInfo('127.0.0.1',10000) ) client_pool.add( MalmoPython.ClientInfo('127.0.0.1',10001) ) MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF) def safeStartMission(agent_host, mission, client_pool, recording, role, experimentId): used_attempts = 0 max_attempts = 5 print("Calling startMission for role", role) while True: try: agent_host.startMission(mission, client_pool, recording, role, experimentId) break except MalmoPython.MissionException as e: errorCode = e.details.errorCode if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP:
</AgentHandlers> </AgentSection>''' xml += '</Mission>' return xml # Set up a client pool. # IMPORTANT: If ANY of the clients will be on a different machine, then you MUST # make sure that any client which can be the server has an IP address that is # reachable from other machines - ie DO NOT SIMPLY USE 127.0.0.1!!!! # The IP address used in the client pool will be broadcast to other agents who # are attempting to find the server - so this will fail for any agents on a # different machine. client_pool = MalmoPython.ClientPool() for x in range(10000, 10000 + NUM_AGENTS + 1): client_pool.add( MalmoPython.ClientInfo('127.0.0.1', x) ) # Keep score of how our robots are doing: survival_scores = [0 for x in range(NUM_AGENTS)] # Lasted to the end of the mission without dying. apple_scores = [0 for x in range(NUM_AGENTS)] # Collecting apples is good. zombie_kill_scores = [0 for x in range(NUM_AGENTS)] # Good! Help rescue humanity from zombie-kind. player_kill_scores = [0 for x in range(NUM_AGENTS)] # Bad! Don't kill the other players! num_missions = 5 if INTEGRATION_TEST_MODE else 30000 for mission_no in range(1, num_missions+1): print("Running mission #" + str(mission_no)) # Create mission xml - use forcereset if this is the first mission. my_mission = MalmoPython.MissionSpec(getXML("true" if mission_no == 1 else "false"), True) # Generate an experiment ID for this mission. # This is used to make sure the right clients join the right servers -
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec() my_mission.timeLimitInSeconds( 10 ) my_mission.requestVideo( 320, 240 ) my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:",error.text) print() last_delta = time.time() # main loop: while world_state.is_mission_running: agent_host.sendCommand( "move 1" ) agent_host.sendCommand( "turn " + str(random.random()*2-1) ) time.sleep(0.5) world_state = agent_host.getWorldState() print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:",reward.getValue()) for error in world_state.errors: print("Error:",error.text) for frame in world_state.video_frames: print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
def deep_q_learning_run(sess, agent_host, q_estimator, state_processor, experiment_dir, epsilon_start=1.0, epsilon_end=0.1, epsilon_decay_steps=8000): """ Q-Learning algorithm for off-policy TD control using Function Approximation. Finds the optimal greedy policy while following an epsilon-greedy policy. Args: sess: Tensorflow Session object env: OpenAI environment q_estimator: Estimator object used for the q values target_estimator: Estimator object used for the targets state_processor: A StateProcessor object num_episodes: Number of episodes to run for experiment_dir: Directory to save Tensorflow summaries in replay_memory_size: Size of the replay memory replay_memory_init_size: Number of random experiences to sampel when initializing the reply memory. update_target_estimator_every: Copy parameters from the Q estimator to the target estimator every N steps discount_factor: Gamma discount factor epsilon_start: Chance to sample a random action when taking an action. Epsilon is decayed over time and this is the start value epsilon_end: The final minimum value of epsilon after decaying is done epsilon_decay_steps: Number of steps to decay epsilon over batch_size: Size of batches to sample from the replay memory record_video_every: Record a video every N episodes Returns: An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards. """ mission_file = agent_host.getStringArgument('mission_file') with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning memory' # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "checkpoints") print("Checkpoint dir is:", checkpoint_dir) saver = tf.train.Saver() # Load a previous checkpoint if we find one # latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) # print("~~~~~~~~~~~~~~", latest_checkpoint) # exit(0) latest_checkpoint = os.path.join(checkpoint_dir, "model") if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) total_t = sess.run(tf.contrib.framework.get_global_step()) # The epsilon decay schedule epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) # The policy we're following policy = make_epsilon_greedy_policy(q_estimator, len(actionSet)) my_mission_record = malmoutils.get_default_recording_object( agent_host, "save_%s-rep" % (expID)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) print("Sleeping") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() agent_host.sendCommand("look -1") agent_host.sendCommand("look -1") while world_state.is_mission_running and all( e.text == '{}' for e in world_state.observations): print("Sleeping....") world_state = agent_host.peekWorldState() # Populate the replay memory with initial experience while world_state.number_of_observations_since_last_state <= 0 and world_state.is_mission_running: # print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() state = gridProcess( world_state ) # MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) stepNum = 0 while world_state.is_mission_running: action = randint(0, 3) print("actions:", action) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action # print("Sending command: ", actionSet[action]) print("Step %s" % stepNum) stepNum += 1 agent_host.sendCommand(actionSet[action]) world_state = agent_host.peekWorldState() num_frames_seen = world_state.number_of_video_frames_since_last_state while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen: world_state = agent_host.peekWorldState() if world_state.is_mission_running: # Getting the reward from taking a step while world_state.number_of_observations_since_last_state <= 0: time.sleep(0.1) world_state = agent_host.peekWorldState() # world_state = agent_host.getWorldState() next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) state = next_state # time.sleep(1) return None
def __init__(self, world_def, video_dim=(32, 32), num_parallel=1, time_limit=20, reset=True, discrete_actions=False, vision_observation=False, depth=False, num_frames=1, grayscale=True): self.video_width, self.video_height = video_dim self.image_width, self.image_height = video_dim self.discrete_actions = discrete_actions self.vision_observation = vision_observation self.depth = depth self.num_parallel = num_parallel self.world_def = world_def self.mission = self.world_def.generate_mission(reset=reset) #self.XGoalPos, self.YGoalPos = self.world_def.goal_pos[0], self.world_def.goal_pos[2] self.mission.requestVideo(self.video_height, self.video_width) self.mission.observeRecentCommands() self.mission.allowAllContinuousMovementCommands() self.mission.timeLimitInSeconds(time_limit) if self.num_parallel > 1: self.client_pool = MalmoPython.ClientPool() for i in range(num_parallel): port = 10000 + i self.client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port)) self.agent_host = MalmoPython.AgentHost() self.agent_host.setObservationsPolicy( MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS) # self.agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) #self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.KEEP_ALL_FRAMES) self.agent_host.setVideoPolicy( MalmoPython.VideoPolicy.LATEST_FRAME_ONLY) self.mission_record_spec = MalmoPython.MissionRecordSpec() if discrete_actions: self._action_set = { 0: "move 1", 1: "turn 0.5", 2: "turn -0.5", 3: None } self.action_space = Discrete(n=len(self._action_set)) else: self._action_set = [ ("move", (-1, 1)), ("turn", (-1, 1)), ("pitch", (-1, 1)), ("use", (0, 1)), ("jump", (0, 1)), ] # self._action_set = [("move", (0, 1)), # ("move", (-1, 0)), # ("turn", (0, 1)), # ("turn", (-1, 0))] lower_bound = np.asarray([x[1][0] for x in self._action_set]) upper_bound = np.asarray([x[1][1] for x in self._action_set]) self.action_space = Box(lower_bound, upper_bound) self.num_frames = num_frames self.grayscale = grayscale if self.grayscale: self.num_frame_channels = 1 high = 1 else: self.num_frame_channels = 3 high = 255 # Obs keys and bounds x_bounds = self.world_def.x_bounds z_bounds = self.world_def.z_bounds self.max_dist = np.linalg.norm((x_bounds[-1], z_bounds[-1])) self.minDistanceFromGoal = None if self.vision_observation: self.observation_space = Box( low=0, high=high, shape=(self.image_height, self.image_width, self.num_frames * self.num_frame_channels)) else: self.obs_keys = [ (u'XPos', x_bounds), (u'YPos', (200, 300)), (u'ZPos', z_bounds), (u'yaw', (0, 360)), (u'pitch', (0, 180)), #(u'XGoalPos', x_bounds), #(u'YGoalPos', z_bounds), (u'DistanceTravelled', (0, 30)), (u'distanceFromGoal', (0, self.max_dist)) ] l_bounds = [key[1][0] for key in self.obs_keys] u_bounds = [key[1][1] for key in self.obs_keys] self.observation_space = Box(np.array(l_bounds), np.array(u_bounds)) self.last_obs = None self.cum_reward = 0 self.distance_travelled = 0 self.terminal = False self.jump = 0
def init(self, client_pool=None, start_minecraft=None, continuous_discrete=True, add_noop_command=None, max_retries=90, retry_sleep=10, step_sleep=0.001, skip_steps=0, videoResolution=None, videoWithDepth=None, observeRecentCommands=None, observeHotBar=None, observeFullInventory=None, observeGrid=None, observeDistance=None, observeChat=None, allowContinuousMovement=None, allowDiscreteMovement=None, allowAbsoluteMovement=None, recordDestination=None, recordObservations=None, recordRewards=None, recordCommands=None, recordMP4=None, gameMode=None, forceWorldReset=None): self.max_retries = max_retries self.retry_sleep = retry_sleep self.step_sleep = step_sleep self.skip_steps = skip_steps self.forceWorldReset = forceWorldReset self.continuous_discrete = continuous_discrete self.add_noop_command = add_noop_command if videoResolution: if videoWithDepth: self.mission_spec.requestVideoWithDepth(*videoResolution) else: self.mission_spec.requestVideo(*videoResolution) if observeRecentCommands: self.mission_spec.observeRecentCommands() if observeHotBar: self.mission_spec.observeHotBar() if observeFullInventory: self.mission_spec.observeFullInventory() if observeGrid: self.mission_spec.observeGrid(*(observeGrid + ["grid"])) if observeDistance: self.mission_spec.observeDistance(*(observeDistance + ["dist"])) if observeChat: self.mission_spec.observeChat() if allowContinuousMovement or allowDiscreteMovement or allowAbsoluteMovement: # if there are any parameters, remove current command handlers first self.mission_spec.removeAllCommandHandlers() if allowContinuousMovement is True: self.mission_spec.allowAllContinuousMovementCommands() elif isinstance(allowContinuousMovement, list): for cmd in allowContinuousMovement: self.mission_spec.allowContinuousMovementCommand(cmd) if allowDiscreteMovement is True: self.mission_spec.allowAllDiscreteMovementCommands() elif isinstance(allowDiscreteMovement, list): for cmd in allowDiscreteMovement: self.mission_spec.allowDiscreteMovementCommand(cmd) if allowAbsoluteMovement is True: self.mission_spec.allowAllAbsoluteMovementCommands() elif isinstance(allowAbsoluteMovement, list): for cmd in allowAbsoluteMovement: self.mission_spec.allowAbsoluteMovementCommand(cmd) if start_minecraft: # start Minecraft process assigning port dynamically self.mc_process, port = minecraft_py.start() logger.info( "Started Minecraft on port %d, overriding client_pool.", port) client_pool = [('127.0.0.1', port)] if client_pool: if not isinstance(client_pool, list): raise ValueError( "client_pool must be list of tuples of (IP-address, port)") self.client_pool = MalmoPython.ClientPool() for client in client_pool: self.client_pool.add(MalmoPython.ClientInfo(*client)) # TODO: produce observation space dynamically based on requested features self.video_height = self.mission_spec.getVideoHeight(0) self.video_width = self.mission_spec.getVideoWidth(0) self.video_depth = self.mission_spec.getVideoChannels(0) self.observation_space = spaces.Box(low=0, high=255, shape=(self.video_height, self.video_width, self.video_depth)) # dummy image just for the first observation self.last_image = np.zeros( (self.video_height, self.video_width, self.video_depth), dtype=np.uint8) self._create_action_space() # mission recording self.mission_record_spec = MalmoPython.MissionRecordSpec( ) # record nothing if recordDestination: self.mission_record_spec.setDestination(recordDestination) if recordRewards: self.mission_record_spec.recordRewards() if recordCommands: self.mission_record_spec.recordCommands() if recordMP4: self.mission_record_spec.recordMP4(*recordMP4) if gameMode: if gameMode == "spectator": self.mission_spec.setModeToSpectator() elif gameMode == "creative": self.mission_spec.setModeToCreative() elif gameMode == "survival": logger.warn( "Cannot force survival mode, assuming it is the default.") else: assert False, "Unknown game mode: " + gameMode
def main(): #Hardcode number of agents to play song num_agents = 4 #Obtain song csv and get solutions #freq_list = mt.create_note_list("Twinkle_Twinkle_Little_Star.csv",120,7000,-.08) #1 Agent #freq_list = mt.create_note_list("Chopsticks.csv",120,4000,-.15,.03) #2 Agents freq_list = mt.create_note_list("Bad_Apple.csv", 120, 3000, -.08, .03) #2 Agents #freq_list = mt.create_note_list("Grenade_120BPM.csv",120,1500,-.08,.03) #4 Agents freq_list = mt.number_converter(freq_list) solutions = cs.get_solutions(freq_list, num_agents) print(solutions) #print(solutions) #Get Mission. Needed for teleport positions. missionXML = getMissionXML(num_agents) #Create musician for each agent and pass teleport positions. musicians = [] for i in range(num_agents): agent_positions = generateAgentTeleportPositions(note_positions, i) musicians.append(Musician(agent_positions)) ''' MALMO ''' print('Starting...', flush=True) #Create agents. agent_hosts = [] for i in range(num_agents): agent_hosts.append(MalmoPython.AgentHost()) malmoutils.parse_command_line(agent_hosts[0]) #Get mission and allow commands for teleport. my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission.allowAllChatCommands() #Add client for each agent needed. my_client_pool = MalmoPython.ClientPool() for i in range(num_agents): my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000 + i)) MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF) #Start mission for each agent for i in range(num_agents): startMission( agent_hosts[i], my_mission, my_client_pool, malmoutils.get_default_recording_object( agent_hosts[0], "agent_" + str(i + 1) + "_viewpoint_discrete"), i, '') #Wait for all missions to begin. waitForStart(agent_hosts) #Pause for simulation to begin. time.sleep(1) ''' SIMULATION BEGINS HERE ''' for i in range(len(solutions[0])): #teleport each agent to the corresponding note. for j in range(len(musicians)): musicians[j].teleport_to_noteblock(agent_hosts[j], solutions[j][i]) # play each note. for k in range(len(musicians)): if musicians[k].can_play: agent_hosts[k].sendCommand("attack 1") time.sleep(0.001) for k in range(len(musicians)): if musicians[k].can_play: agent_hosts[k].sendCommand("attack 0") musicians[k].can_play = False #modifies the timing between each note hit. time.sleep(0.2)
if actions[step] == 4: nextx = x - 1 if table[nextx][nextz] == 1: return False # if abs(actions[step] - actions[step - 1]) == 2: # return False # if actions[step] == 1: z = z + 1 # if actions[step] == 2: x = x + 1 # if actions[step] == 3: z = z - 1 # if actions[step] == 4: x = x - 1 # table[x][z] = 1 return True my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission_record = MalmoPython.MissionRecordSpec() my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001)) #10000 in use - try 10001 # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: #agent_host.startMission(my_mission, my_mission_record) agent_host.startMission(my_mission, my_client_pool, my_mission_record, 0, "experimentID2") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2)
def deep_q_learning(sess, agent_host, q_estimator, target_estimator, state_processor, num_episodes, experiment_dir, replay_memory_size=500000, replay_memory_init_size=50000, update_target_estimator_every=10000, discount_factor=0.99, epsilon_start=1.0, epsilon_end=0.1, epsilon_decay_steps=50000, batch_size=32, record_video_every=100): """ Q-Learning algorithm for off-policy TD control using Function Approximation. Finds the optimal greedy policy while following an epsilon-greedy policy. Args: sess: Tensorflow Session object env: OpenAI environment q_estimator: Estimator object used for the q values target_estimator: Estimator object used for the targets state_processor: A StateProcessor object num_episodes: Number of episodes to run for experiment_dir: Directory to save Tensorflow summaries in replay_memory_size: Size of the replay memory replay_memory_init_size: Number of random experiences to sampel when initializing the reply memory. update_target_estimator_every: Copy parameters from the Q estimator to the target estimator every N steps discount_factor: Gamma discount factor epsilon_start: Chance to sample a random action when taking an action. Epsilon is decayed over time and this is the start value epsilon_end: The final minimum value of epsilon after decaying is done epsilon_decay_steps: Number of steps to decay epsilon over batch_size: Size of batches to sample from the replay memory record_video_every: Record a video every N episodes Returns: An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards. """ mission_file = agent_host.getStringArgument('mission_file') with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning memory' Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"]) # The replay memory replay_memory = [] # Keeps track of useful statistics stats = plotting.EpisodeStats( episode_lengths=np.zeros(num_episodes), episode_rewards=np.zeros(num_episodes)) # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "checkpoints") checkpoint_path = os.path.join(checkpoint_dir, "model") monitor_path = os.path.join(experiment_dir, "monitor") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if not os.path.exists(monitor_path): os.makedirs(monitor_path) saver = tf.train.Saver() # Load a previous checkpoint if we find one latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) total_t = sess.run(tf.contrib.framework.get_global_step()) # The epsilon decay schedule epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) # The policy we're following policy = make_epsilon_greedy_policy( q_estimator, len(actionSet)) my_mission_record = malmoutils.get_default_recording_object(agent_host, "./save_%s-rep" % (expID)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" %(expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) print("Sleeping") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() # world_state = agent_host.peekWorldState() while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): print("Sleeping....") world_state = agent_host.peekWorldState() world_state = agent_host.getWorldState() # Populate the replay memory with initial experience print("Populating replay memory...") while world_state.number_of_observations_since_last_state <= 0: # print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() state = gridProcess(world_state) #MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) for i in range(replay_memory_init_size): print("%s th replay memory" %i) action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)]) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action # print("Sending command: ", actionSet[action]) agent_host.sendCommand(actionSet[action]) #checking if the mission is done world_state = agent_host.peekWorldState() #Getting the reward from taking a step if world_state.number_of_rewards_since_last_state > 0: reward = world_state.rewards[-1].getValue() print("Just received the reward: %s on action: %s "%(reward, actionSet[action])) else: print("No reward") reward = 0 #getting the next state while world_state.number_of_observations_since_last_state <=0 and world_state.is_mission_running: print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() if world_state.is_mission_running: next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2) done = not world_state.is_mission_running replay_memory.append(Transition(state, action, reward, next_state, done)) state = next_state else: for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() world_state = agent_host.peekWorldState() while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): world_state = agent_host.peekWorldState() world_state = agent_host.getWorldState() if not world_state.is_mission_running: print("Breaking") break state = gridProcess(world_state) # Malmo GetworldState? / env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) print("Finished populating memory") # Record videos # Use the gym env Monitor wrapper # env = Monitor(env, # directory=monitor_path, # resume=True, # video_callable=lambda count: count % record_video_every ==0) # NEED TO RECORD THE VIDEO AND SAVE TO THE SPECIFIED DIRECTORY for i_episode in range(num_episodes): print("%s-th episode"%i_episode) if i_episode != 0: mission_file = agent_host.getStringArgument('mission_file') with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() # my_mission.requestVideo(320, 240) my_mission.forceWorldReset() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning ' my_mission_record = malmoutils.get_default_recording_object(agent_host, "./save_%s-rep%d" % (expID, i)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() print("Waiting for the mission to start", end=' ') while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) # Save the current checkpoint saver.save(tf.get_default_session(), checkpoint_path) # world_state = agent_host.getWorldState() # Reset the environment # world_state = agent_host.peekWorldState() while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): # print("Sleeping!!!") world_state = agent_host.peekWorldState() world_state = agent_host.getWorldState() state = gridProcess(world_state) #MalmoGetWorldState? state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) loss = None # One step in the environment for t in itertools.count(): # Epsilon for this time step epsilon = epsilons[min(total_t, epsilon_decay_steps-1)] # Add epsilon to Tensorboard episode_summary = tf.Summary() episode_summary.value.add(simple_value=epsilon, tag="epsilon") q_estimator.summary_writer.add_summary(episode_summary, total_t) # Maybe update the target estimator if total_t % update_target_estimator_every == 0: copy_model_parameters(sess, q_estimator, target_estimator) print("\nCopied model parameters to target network.") # Print out which step we're on, useful for debugging. print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format( t, total_t, i_episode + 1, num_episodes, loss), end="") sys.stdout.flush() # Take a step action_probs = policy(sess, state, epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo AgentHost send command? # print("Sending command: ", actionSet[action]) agent_host.sendCommand(actionSet[action]) world_state = agent_host.peekWorldState() if world_state.number_of_rewards_since_last_state > 0: reward = world_state.rewards[-1].getValue() print("Just received the reward: %s on action: %s " % (reward, actionSet[action])) else: print("No reward") reward = 0 while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): # print("Sleeping!!!") world_state = agent_host.peekWorldState() # world_state = agent_host.getWorldState() # if not world_state.is_mission_running: # print("Breaking") # break done = not world_state.is_mission_running print(" IS MISSION FINISHED? ", done) # if done: # print("Breaking before updating last reward") # break next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2) # If our replay memory is full, pop the first element if len(replay_memory) == replay_memory_size: replay_memory.pop(0) # Save transition to replay memory replay_memory.append(Transition(state, action, reward, next_state, done)) # Update statistics stats.episode_rewards[i_episode] += reward stats.episode_lengths[i_episode] = t # Sample a minibatch from the replay memory samples = random.sample(replay_memory, batch_size) states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples)) # Calculate q values and targets (Double DQN) q_values_next = q_estimator.predict(sess, next_states_batch) best_actions = np.argmax(q_values_next, axis=1) q_values_next_target = target_estimator.predict(sess, next_states_batch) targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \ discount_factor * q_values_next_target[np.arange(batch_size), best_actions] # Perform gradient descent update states_batch = np.array(states_batch) loss = q_estimator.update(sess, states_batch, action_batch, targets_batch) if done: print("End of Episode") break state = next_state total_t += 1 # Add summaries to tensorboard episode_summary = tf.Summary() episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], node_name="episode_reward", tag="episode_reward") episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], node_name="episode_length", tag="episode_length") q_estimator.summary_writer.add_summary(episode_summary, total_t) q_estimator.summary_writer.flush() yield total_t, plotting.EpisodeStats( episode_lengths=stats.episode_lengths[:i_episode+1], episode_rewards=stats.episode_rewards[:i_episode+1]) # env.monitor.close() return stats
validate = True agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print 'ERROR:', e print agent_host.getUsage() exit(1) if agent_host.receivedArgument("help"): print agent_host.getUsage() exit(0) # Create a pool of Minecraft Mod clients: my_client_pool = MalmoPython.ClientPool() # Add the default client - port 10000 on the local machine: my_client = MalmoPython.ClientInfo("127.0.0.1", 10000) my_client_pool.add(my_client) # Add extra clients here: # eg my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001)) etc # Create a unique identifier - different each time this script is run. # In multi-agent missions all agents must pass the same experimentID, in order to prevent agents from joining the wrong experiments. experimentID = uuid.uuid4() # Create a folder to put our recordings - the platform will not create missing folders itself, it will simply throw an exception. recordingsDirectory = "QuiltRecordings" try: os.makedirs(recordingsDirectory) except OSError as exception: if exception.errno != errno.EEXIST: # ignore error if already existed raise
def __init__(self, maze_def, reset, video_dim=(32, 32), num_parallel=1, time_limit=30, discrete_actions=False, vision_observation=True, depth=False, num_frames=1, grayscale=True): self.video_width, self.video_height = video_dim self.image_width, self.image_height = video_dim self.discrete_actions = discrete_actions self.vision_observation = vision_observation self.depth = depth self.num_parallel = num_parallel maze = create_maze(maze_def) self.mission_gen = MissionGen() self.mission = self.mission_gen.generate_mission( maze.create_maze_array(), reset=reset) self.XGoalPos, self.YGoalPos = self.mission_gen.goal_pos[ 0], self.mission_gen.goal_pos[2] # with open(mission_file, 'r') as f: # print("Loading mission from %s" % mission_file) # mission_xml = f.read() # self.mission = MalmoPython.MissionSpec(mission_xml, True) self.mission.requestVideo(self.video_height, self.video_width) self.mission.observeRecentCommands() self.mission.allowAllContinuousMovementCommands() # self.mission.timeLimitInSeconds(time_limit) if self.num_parallel > 1: self.client_pool = MalmoPython.ClientPool() for i in range(num_parallel): port = 10000 + i self.client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port)) self.agent_host = MalmoPython.AgentHost() self.agent_host.setObservationsPolicy( MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS) # self.agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.KEEP_ALL_FRAMES) # self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY) self.mission_record_spec = MalmoPython.MissionRecordSpec() if discrete_actions: self._action_set = {0: "move 1", 1: "turn 1", 2: "turn -1"} self.action_space = Discrete(n=len(self._action_set)) else: # self._action_set = ["move", "turn", "pitch"] # self.action_space = Box(np.array([0, -.5, -.5]), np.array([1, .5, .5])) self._action_set = [("move", (-1, 1)), ("turn", (-0.5, 0.5))] #("jump", (-1, 1))] lower_bound = np.asarray([x[1][0] for x in self._action_set]) upper_bound = np.asarray([x[1][1] for x in self._action_set]) self.action_space = Box(lower_bound, upper_bound) self.num_frames = num_frames self.grayscale = grayscale if self.grayscale: self.num_frame_channels = 1 high = 1 else: self.num_frame_channels = 3 high = 255 # Obs keys and bounds x_bounds = self.mission_gen.x_bounds z_bounds = self.mission_gen.z_bounds self.max_dist = np.linalg.norm((x_bounds[-1], z_bounds[-1])) self.minDistanceFromGoal = None if self.vision_observation: self.observation_space = Box( low=0, high=high, shape=(self.num_frames * self.num_frame_channels, self.image_height, self.image_width)) else: self.obs_keys = [(u'XPos', x_bounds), (u'ZPos', z_bounds), (u'yaw', (0, 360)), (u'XGoalPos', x_bounds), (u'YGoalPos', z_bounds), (u'DistanceTravelled', (0, 30)), (u'distanceFromGoal', (0, self.max_dist))] l_bounds = [key[1][0] for key in self.obs_keys] u_bounds = [key[1][1] for key in self.obs_keys] self.observation_space = Box(np.array(l_bounds), np.array(u_bounds)) # self._horizon = env.spec.timestep_limit self.last_obs = None self.cum_reward = 0 self.distance_travelled = 0 self.terminal = False self.jump = 0
</RewardForDamagingEntity> <ObservationFromNearbyEntities> <Range name="entities" xrange="''' + str( ARENA_WIDTH) + '''" yrange="2" zrange="''' + str( ARENA_BREADTH) + '''" /> </ObservationFromNearbyEntities> <ObservationFromFullStats/>''' + video_requirements + ''' </AgentHandlers> </AgentSection> </Mission>''' validate = True my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000)) episode_reward = 0 if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 10000 fout = open('results.csv', 'w') # Set up the agent agent = agentMC.agentMC(agent_host, MAX_ZOMBIES, MAX_DISTANCE, 20) for i in range(num_reps): print('episode:', i) for iRepeat in range(1, MAX_ZOMBIES): ######################################### # Set up the enviornment # #########################################
# See if we can parse our extended command line. malmoutils.parse_command_line(agentHost) # As we are not recording our video xml should be an empty string. assert malmoutils.get_video_xml(agentHost) == '' # Test that we can get a default recording spec. assert type(malmoutils.get_default_recording_object( agentHost, "test")) == MalmoPython.MissionRecordSpec # Default recordings directory is ''. assert malmoutils.get_recordings_directory(agentHost) == '' def clientInfos(cp): return [(c.ip_address, c.control_port, c.command_port) for c in cp.clients] # Test adding some client infos to a client pool. clientPool = MalmoPython.ClientPool() assert len(clientPool.clients) == 0 c1 = ("localhost", 10000, 0) client1 = MalmoPython.ClientInfo(*c1) clientPool.add(client1) assert clientInfos(clientPool) == [c1] c2 = ("127.0.0.1", 10001, 20001) client2 = MalmoPython.ClientInfo(*c2) clientPool.add(client2) assert clientInfos(clientPool) == [c1, c2]
''' return missionXML.format(src=seedfile, limit=timelimit, xcoord=random.randint(0,300), zcoord=random.randint(100, 350), tlimit=eptime) agent_id = 10001 counter = 9019 while counter < numphotos: agent_host = MalmoPython.AgentHost() try: missionXML = generateXMLbySeed() my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission_record = MalmoPython.MissionRecordSpec() except Exception as e: print("open mission ERROR: ", e) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available agent_id += 1 # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "IMGCOLLECTOR") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) # Loop until mission starts:
turn_speed = self.angvel(yaw_to_next_point, yaw, 16.0) move_speed = (1.0 - abs(turn_speed)) * (1.0 - abs(turn_speed)) self.agent_host.sendCommand("turn " + str(turn_speed)) self.agent_host.sendCommand("move " + str(move_speed)) time.sleep(0.001) sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) # flush print output immediately # Create a pool of Minecraft Mod clients. # By default, mods will choose consecutive mission control ports, starting at 10000, # so running four mods locally should produce the following pool by default (assuming nothing else # is using these ports): my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000)) my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001)) my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002)) my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003)) my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10004)) my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10005)) # Create one agent host for parsing: parser = MalmoPython.AgentHost() options = [("nn", "n", RouteGenerators.NearestNeighbour, True), ("gen-al", "g", RouteGenerators.Genetic, False), ("div-and-conq", "d", RouteGenerators.DivideAndConquer, False), ("mst", "m", RouteGenerators.MinSpanTree, True), ("conv-hull", "c", RouteGenerators.Spiral, False), ("sa", "s", RouteGenerators.Annealing, True)]