def init_malmo(self): """ Initialize new malmo mission. """ my_mission = MalmoPython.MissionSpec(self.GetMissionXML(), True) my_mission.forceWorldReset() #so it doesn't collect any past objects my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) self.num_entities = self.num_entities_copy max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available for retry in range(max_retries): try: self.agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "ZombieKiller") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) self.cobblestone_wall = 0 return self.agent_host
def load_solo_mission(self, mission, agent): mission_record = MalmoPython.MissionRecordSpec() mission.setViewpoint(0) clients = MalmoPython.ClientPool() clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available #Quit from existing mission max_retries = 25 #Quit out of currently running mission agent.sendCommand("quit") # Attempt to start a mission: for retry in range(max_retries): try: agent.startMission(mission, clients, mission_record, 0, "") break except RuntimeError as e: if retry == max_retries - 1: print("Failed to start mission.") exit(1) else: time.sleep(2) # Loop until mission starts: print("Waiting for the mission to start ") world_state = agent.getWorldState() while not world_state.has_mission_begun: #sys.stdout.write(".") time.sleep(0.1) world_state = agent.getWorldState() for error in world_state.errors: print("Error:", error.text) print() print("Mission running.")
def create_mission(ind,agent_host,start, dropoff, pickup): my_mission = MalmoPython.MissionSpec(GetMissionXML(start), True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) # Attempt to start a mission: max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available my_mission.drawBlock(int(pickup[0]), 1, int(pickup[1]), "redstone_block") my_mission.drawBlock(int(dropoff[0]), 1, int(dropoff[1]), "diamond_block") for retry in range(max_retries): try: agent_host.startMission( my_mission, my_clients, my_mission_record, 0, "%s-%d" % ('Herobrine', ind) ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission", ":",e) exit(1) else: time.sleep(2) world_state = agent_host.peekWorldState() while not world_state.has_mission_begun: time.sleep(0.1) world_state = agent_host.peekWorldState() for error in world_state.errors: print("Error:",error.text) time.sleep(1) return my_mission, world_state
def init_malmo(agent_host): my_mission = MalmoPython.MissionSpec( GetMissionXML(Hyperparameters.SIZE, Hyperparameters.OBS_SIZE, Hyperparameters.MAX_EPISODE_STEPS), True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "MineGuyz") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) return agent_host
def init_malmo(agent_host): """ Initialize new malmo mission. """ my_mission = MalmoPython.MissionSpec(GetMissionXML(), True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(1200, 700) my_mission.setViewpoint(0) max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "Dropper") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) return agent_host
def init_malmo(self): """ Initialize new malmo mission. """ my_mission = MalmoPython.MissionSpec(self.get_mission_xml(), True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available for retry in range(max_retries): try: self.agent_host.startMission(my_mission, my_clients, my_mission_record, 0, 'DiamondCollector') break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) world_state = self.agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.1) world_state = self.agent_host.getWorldState() for error in world_state.errors: print("\nError:", error.text) return world_state
def _config_clients( clients: List[Tuple[str, int]] = None) -> Union[None, MalmoPython.ClientPool]: if clients is None: return None client_pool = MalmoPython.ClientPool() for client in clients: client_pool.add(MalmoPython.ClientInfo(client[0], client[1])) return client_pool
def load_duo_mission(self, mission, agents): #agen mission_record = MalmoPython.MissionRecordSpec() mission.setViewpoint(0) # Attempt to start a mission: max_retries = 25 #Quit out of currently running mission agents[0].sendCommand("quit") agents[1].sendCommand("quit") clients = MalmoPython.ClientPool() clients.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) clients.add(MalmoPython.ClientInfo('127.0.0.1', 10002)) for retry in range(max_retries): try: agents[0].startMission(mission, clients, mission_record, 0, "") break except RuntimeError as e: print("Error starting mission", e) if retry == max_retries - 1: exit(1) else: time.sleep(2) for retry in range(max_retries): try: agents[1].startMission(mission, clients, mission_record, 1, "") break except RuntimeError as e: print("Error starting mission", e) if retry == max_retries - 1: exit(1) else: time.sleep(2) # Loop until mission starts: print("Waiting for the mission to start ") world_state = agents[0].getWorldState() error_ticks = 0 while not world_state.has_mission_begun: #sys.stdout.write(".") time.sleep(0.1) world_state = agents[0].getWorldState() for error in world_state.errors: print("Error:", error.text) if len(world_state.errors) > 0: error_ticks += 1 if error_ticks >= 5: return self.load_duo_mission(mission, agents) print() print("Mission running.")
def init_malmo(agent_host, recordingsDirectory, video_width, video_height): my_mission = MalmoPython.MissionSpec( GetMissionXML(Hyperparameters.SIZE, Hyperparameters.OBS_SIZE, Hyperparameters.MAX_EPISODE_STEPS, video_width, video_height), True) my_mission.setViewpoint(0) agent_host.setObservationsPolicy( MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY) my_mission_record = MalmoPython.MissionRecordSpec() if recordingsDirectory: my_mission_record.recordRewards() my_mission_record.recordObservations() my_mission_record.recordCommands() if agent_host.receivedArgument("record_video"): my_mission_record.recordMP4(24, 2000000) if recordingsDirectory: my_mission_record.setDestination(recordingsDirectory + "//" + "Mission_" + str(test + 1) + ".tgz") max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "MineGuyz") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) return agent_host
def __init__( self, missionXML, validate, setup_mission=None, ip="127.0.0.1", port=10000 ): super() self.agent_host = MalmoPython.AgentHost() self.clientPool = MalmoPython.ClientPool() self.clientPool.add(MalmoPython.ClientInfo(ip, port)) try: self.agent_host.parse(sys.argv) except RuntimeError as e: print("ERROR:", e) print(self.agent_host.getUsage()) exit(1) if self.agent_host.receivedArgument("help"): print(self.agent_host.getUsage()) exit(0) self.mission = MalmoPython.MissionSpec(missionXML, validate) self.mission_record = MalmoPython.MissionRecordSpec() if setup_mission is not None: setup_mission(self.mission)
def create_mission(ind, agent_host): my_mission = MalmoPython.MissionSpec(GetMissionXML(), True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) # Attempt to start a mission: max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available for i in range(len(legalPosList)): if i == itemPosId: my_mission.drawBlock(legalPosList[itemPosId][0], 1, legalPosList[itemPosId][1], "emerald_block") elif i == destPosId: my_mission.drawBlock(legalPosList[destPosId][0], 1, legalPosList[destPosId][1], "diamond_block") else: my_mission.drawBlock(legalPosList[i][0], 1, legalPosList[i][1], "stone") for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "%s-%d" % ('Herobrine', ind)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission", ":", e) exit(1) else: time.sleep(2) world_state = agent_host.peekWorldState() while not world_state.has_mission_begun: time.sleep(0.1) world_state = agent_host.peekWorldState() for error in world_state.errors: print("Error:", error.text) time.sleep(1) return my_mission
def init_malmo(self, recordings_directory=DEFAULT_RECORDINGS_DIR): if self.is_malmo_initialized: return launch_minecraft_in_background('/app/MalmoPlatform/Minecraft', ports=[10000, 10001]) # Set up two agent hosts self.agent_host_bot = MalmoPython.AgentHost() self.agent_host_camera = MalmoPython.AgentHost() # Create list of Minecraft clients to attach to. The agents must # have been launched before calling record_malmo_video using # init_malmo() self.client_pool = MalmoPython.ClientPool() self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) # Use bot's agenthost to hold the command-line options malmoutils.parse_command_line( self.agent_host_bot, ['--record_video', '--recording_dir', recordings_directory]) self.is_malmo_initialized = True
def init(self, client_pool=None, start_minecraft=None, continuous_discrete=True, add_noop_command=None, max_retries=90, retry_sleep=10, step_sleep=0.001, skip_steps=0, videoResolution=None, videoWithDepth=None, observeRecentCommands=None, observeHotBar=None, observeFullInventory=None, observeGrid=None, observeDistance=None, observeChat=None, allowContinuousMovement=None, allowDiscreteMovement=None, allowAbsoluteMovement=None, recordDestination=None, recordObservations=None, recordRewards=None, recordCommands=None, recordMP4=None, gameMode=None, forceWorldReset=None): self.max_retries = max_retries self.retry_sleep = retry_sleep self.step_sleep = step_sleep self.skip_steps = skip_steps self.forceWorldReset = forceWorldReset self.continuous_discrete = continuous_discrete self.add_noop_command = add_noop_command self.client_pool = client_pool if videoResolution: if videoWithDepth: self.mission_spec.requestVideoWithDepth(*videoResolution) else: self.mission_spec.requestVideo(*videoResolution) if observeRecentCommands: self.mission_spec.observeRecentCommands() if observeHotBar: self.mission_spec.observeHotBar() if observeFullInventory: self.mission_spec.observeFullInventory() if observeGrid: self.mission_spec.observeGrid(*(observeGrid + ["grid"])) if observeDistance: self.mission_spec.observeDistance(*(observeDistance + ["dist"])) if observeChat: self.mission_spec.observeChat() if allowDiscreteMovement: # if there are any parameters, remove current command handlers first self.mission_spec.removeAllCommandHandlers() if allowDiscreteMovement is True: self.mission_spec.allowAllDiscreteMovementCommands() elif isinstance(allowDiscreteMovement, list): for cmd in allowDiscreteMovement: self.mission_spec.allowDiscreteMovementCommand(cmd) if start_minecraft: # start Minecraft process assigning port dynamically self.mc_process, port = minecraft_py.start() logger.info( "Started Minecraft on port %d, overriding client_pool.", port) client_pool = [('127.0.0.1', port)] """ make client_pool usable for Malmo: change format of the client_pool to struct """ if client_pool: if not isinstance(client_pool, list): raise ValueError( "client_pool must be list of tuples of (IP-address, port)") self.client_pool = MalmoPython.ClientPool() for client in client_pool: self.client_pool.add(MalmoPython.ClientInfo(*client)) """ initialize video parameters for video processing """ self.video_height = self.mission_spec.getVideoHeight(0) self.video_width = self.mission_spec.getVideoWidth(0) self.video_depth = self.mission_spec.getVideoChannels(0) self.observation_space = spaces.Box(low=0, high=255, shape=(self.video_height, self.video_width, self.video_depth)) """ dummy image just for the first observation """ self.last_image1 = np.zeros( (self.video_height, self.video_width, self.video_depth), dtype=np.float32) self.last_image2 = np.zeros( (self.video_height, self.video_width, self.video_depth), dtype=np.float32) self.create_action_space() """ mission recording """ self.mission_record_spec = MalmoPython.MissionRecordSpec( ) # record nothing if recordDestination: self.mission_record_spec.setDestination(recordDestination) if recordRewards: self.mission_record_spec.recordRewards() if recordCommands: self.mission_record_spec.recordCommands() if recordMP4: self.mission_record_spec.recordMP4(*recordMP4) """ game mode """ if gameMode: if gameMode == "spectator": self.mission_spec.setModeToSpectator() elif gameMode == "creative": self.mission_spec.setModeToCreative() elif gameMode == "survival": logger.warn( "Cannot force survival mode, assuming it is the default.") else: assert False, "Unknown game mode: " + gameMode
# logger.info("MALMO_XSD_PATH not set? Check environment.") # exit(1) # mission_file = os.path.abspath(schema_dir) # if not os.path.exists(mission_file): # logger.info("Could not find Maze.xml under MALMO_XSD_PATH") # exit(1) # add some args agent_host.addOptionalStringArgument('mission_file', 'Path/to/file from which to load the mission.', mission_file) agent_host.addOptionalFlag('load_model', 'Load initial model from model_file.') agent_host.addOptionalStringArgument('model_file', 'Path to the initial model file', '') agent_host.addOptionalFlag('debug', 'Turn on debugging.') agent_host.setRewardsPolicy(MalmoPython.RewardsPolicy.LATEST_REWARD_ONLY) agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) malmoutils.parse_command_line(agent_host) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) agentID = 0 ################################################ ############################### prepare training action_list = ["movenorth 1", "movesouth 1", "movewest 1", "moveeast 1"] ckpt_dir = os.path.abspath("./checkpoints") ckpt_save_rate = 50 if os.path.exists(ckpt_dir): shutil.rmtree(ckpt_dir) os.makedirs(ckpt_dir) bs = 2000 update_rate=10 dqn = DQN(batch_size=bs, update_rate=update_rate, lr=2e-4) memory = []
def main(): # Start mission # Create default Malmo objects: global agent_host global matrix2dOriginal global maze_map global actionHistCounter agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument("help"): print(agent_host.getUsage()) exit(0) # The following one line is for setting how many times you want the agent to repeat num_repeats = 50 esFile = open("Eval_Stats.txt", "w+") esFile.write("\n") esFile.close() esFile = open("Eval_Stats.txt", "a") trFile = open("training_result.txt", "w+") trFile.write("\n") trFile.close() trFile = open("training_result.txt", "a") for i in range(num_repeats): esFile.write("Run #" + str(i + 1) + "\n") actionHistCounter = i + 1 # size = int(6 + 0.5*i) print("Size of maze:", size_of_maze) #my_mission = MalmoPython.MissionSpec(get_mission_xml("0", 0.4 + float(i/20.0), size_of_maze, 0), True) randomDif = random.uniform(-0.2, 0.2) print("Parameters of the mission:", str(i), "next:", 0.4 + randomDif, "size:", size_of_maze) my_mission = MalmoPython.MissionSpec( get_mission_xml(str(i), 0.4 + randomDif, size_of_maze, 0), True) # my_mission = MalmoPython.MissionSpec(get_mission_xml(), True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) # Attempt to start a mission: max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "%s-%d" % ('Moshe', i)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission", (i + 1), ":", e) exit(1) else: time.sleep(2) # Loop until mission starts: print( "Waiting for the mission", (i + 1), "to start ", ) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: #sys.stdout.write(".") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() print("Mission", (i + 1), "running.") grid = load_grid(world_state, agent_host) # print("World State Grid:", grid) print("Size of actual map:", len(grid)) maze_map = get_maze_map(grid) print("maze map:", len(maze_map)) #print(maze_map[244]) #The maze construction matrix2dOriginal = maze_to_2dMatrix(maze_map, size_of_maze) matrix2d = maze_to_2dMatrix_reversed(maze_map, size_of_maze) print("the matrix 2d: ", matrix2d) matrixArray = matrix2d.flatten() start_and_end_positions_in_actual_map = find_start_end(grid) print("size of maze map:", len(maze_map)) print("first position in actual map:", first_block_index_in_actual_map) print("last position in actual map:", last_block_index_in_actual_map) global agent_current_position_xy_in_maze, agent_current_position_index_in_grid agent_current_position_xy_in_maze = get_xy_position_of_maze_map_by_position_of_actual_map(\ start_and_end_positions_in_actual_map[0], \ grid) print("Started: agent current position(xy in maze):", agent_current_position_xy_in_maze) agent_current_position_index_in_grid = get_position_of_actual_map_by_xy_position_of_maze_map(\ agent_current_position_xy_in_maze, grid) print("Started: agent current position(index in grid):", agent_current_position_index_in_grid \ , "compared with real position:", start_and_end_positions_in_actual_map[0]) index_of_yaw = my_mission.getAsXML(True).index("yaw") yaw_of_agent = int( re.compile("(\d+)").match( my_mission.getAsXML(True)[index_of_yaw + 5:index_of_yaw + 8]).group(1)) sync_agent_direction_with_yaw(yaw_of_agent) print("Started: agent current yaw(face to where):", yaw_of_agent) # go_to_goal_and_finish_mission(grid, start_and_end_positions_in_actual_map[0], \ # start_and_end_positions_in_actual_map[1], world_state, agent_host, i) print("Started: How many walkable blocks in front of agent's direction:", agent_current_direction, "is walk able? Answer:", \ get_num_of_walkable_blocks_in_front_of_agent(agent_current_position_xy_in_maze, size_of_maze, grid)) # test_moving(agent_host, [3, 3, 0, 3, 3, 0, 3]) positionTransition(grid, matrixArray, yaw_of_agent, size_of_maze) trainingStart = time.time() trainingProcess = Process(target=missionTrainingStart, args=(actionHistCounter, )) trainingProcess.start() stringList = [] is_complete_action_history = False curr_action_counter = 0 while True: if not is_complete_action_history: actionHistFile = None while True: try: actionHistFile = open( "action_history_" + str(actionHistCounter) + "_.txt", "r") stringList = actionHistFile.readlines() if len(stringList) != 0: break except: continue #actionHistFile = open("action_history_"+str(actionHistCounter)+"_.txt", "r") #stringList = actionHistFile.readlines() print("Reading action history file, get string: ", stringList) curr_action_list = stringList[0].split(' ') actionHistFile.close() print("Here is the list length:", len(curr_action_list), curr_action_counter + 1) try: if (len(curr_action_list) >= curr_action_counter + 1): action = curr_action_list[curr_action_counter] convertAction = directionConvert(int(action[0])) test_moving(agent_host, [convertAction], grid) curr_action_counter += 1 except ValueError: # The last index of action is a newline character break if (stringList[len(stringList) - 1] == "END"): is_complete_action_history = True if (is_complete_action_history and len(curr_action_list) == curr_action_counter - 1): break trainingProcess.join() trainingEnd = time.time() trainingElapsed = trainingEnd - trainingStart esFile.write("Training Time: " + str(trainingElapsed) + " ") #actionHistFile.close() ''' print(stringList) actionCollection = [] positionCollection = [] for n in range(0, len(stringList)-1): tmp = stringList[n].split(' ') for m in range(0,len(tmp)-1): L = tmp[m].split(',') actionCollection.append(L[0]) positionCollection.append([L[1],L[2]]) print('The original: ',actionCollection) print(positionCollection) ''' """ del stringList[-1] for string in stringList: actionCollection = string.split(' ') del actionCollection[-1] for aindex in range(len(actionCollection)): converted = directionConvert(int(actionCollection[aindex])) actionCollection[aindex] = converted actionList.append(actionCollection) """ """ for testingset in actionList: #check if it's reachable test_moving(agent_host, testingset) """ ''' actionList = [] for index in range(len(actionCollection)): row,col = positionCollection[index][0], positionCollection[index][1] action = actionCollection[index] print(matrix2d[int(row)][int(col)]) if matrix2d[int(row)][int(col)] == 0: convertAction = directionConvert(int(action)) actionList.append(convertAction) #print('THIS IS THE ACTION: ',len(actionList), actionList) print('The list:', actionList) #raise('STOP HERE') test_moving(agent_host, actionList, grid) ''' print( "Training complete. Training result can be found in training_result.txt." ) travelStart = time.time() go_to_goal_and_finish_mission(grid, agent_current_position_index_in_grid, \ start_and_end_positions_in_actual_map[1], world_state, agent_host, i) travelEnd = time.time() travelElapsed = travelEnd - travelStart esFile.write("Agent Travel Time: " + str(travelElapsed) + "\n\n") print("Aiku did it!") trFile.close() esFile.close()
</VideoProducer> </AgentHandlers> </AgentSection> </Mission>''' return missionXML # Set up a client pool. # IMPORTANT: If ANY of the clients will be on a different machine, then you MUST # make sure that any client which can be the server has an IP address that is # reachable from other machines - ie DO NOT SIMPLY USE 127.0.0.1!!!! # The IP address used in the client pool will be broadcast to other agents who # are attempting to find the server - so this will fail for any agents on a # different machine. client_pool = MalmoPython.ClientPool() for x in range(10000, 10000 + NUM_AGENTS + 1): client_pool.add(MalmoPython.ClientInfo('127.0.0.1', x)) num_missions = 5 if INTEGRATION_TEST_MODE else 30000 for mission_no in range(1, num_missions + 1): print("Running mission #" + str(mission_no)) # Create mission xml - use forcereset if this is the first mission. my_mission = MalmoPython.MissionSpec( getXML("true" if mission_no == 1 else "false"), True) # Generate an experiment ID for this mission. # This is used to make sure the right clients join the right servers - # if the experiment IDs don't match, the startMission request will be rejected. # In practice, if the client pool is only being used by one researcher, there # should be little danger of clients joining the wrong experiments, so a static
def main(model=None, mode='train', start_episode=0): my_xml = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hill Descent.</Summary> </About> <ModSettings> <MsPerTick>20</MsPerTick> </ModSettings> <ServerSection> <ServerInitialConditions> <Time><StartTime>1</StartTime></Time> </ServerInitialConditions> <ServerHandlers> <DefaultWorldGenerator seed="-999595225643433963" forceReset="false" destroyAfterUse="false" /> <ServerQuitFromTimeUp timeLimitMs="100000000"/> <ServerQuitWhenAnyAgentFinishes/> </ServerHandlers> </ServerSection> <AgentSection mode="Survival"> <Name>Bob</Name> <AgentStart> <Placement x="28.5" y="87" z="330.5" pitch="-90" yaw="0"/> </AgentStart> <AgentHandlers> <DiscreteMovementCommands/> <MissionQuitCommands quitDescription="done"/> <ChatCommands/> <ObservationFromFullStats/> <ObservationFromGrid> <Grid name="sight"> <min x="{}" y="{}" z="{}"/> <max x="{}" y="{}" z="{}"/> </Grid> <Grid name="feet"> <min x="0" y="-1" z="0"/> <max x="0" y="-1" z="0"/> </Grid> </ObservationsationFromGrid> <AgentQuitFromTouchingBlockType> <Block type="cobblestone" /> </AgentQuitFromTouchingBlockType> </AgentHandlers> </AgentSection> </Mission> '''.format(-(grid_width - 1) // 2, -grid_height, -(grid_width - 1) // 2, (grid_width - 1) // 2, grid_height, (grid_width - 1) // 2) batch_size = 100 agent = DQNAgent(state_size, action_size, learning_rate, discount_rate, epsilon, epsilon_min, epsilon_decay) if model != None: agent.load(model) if mode == 'test': agent.epsilon = 0.0 print('loaded model: {}'.format(model)) else: clear_csv('./data/results.csv') clear_csv('./data/moves.csv') my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001)) agent_host = MalmoPython.AgentHost() for e in range(start_episode + 1, episodes + 1): my_mission = MalmoPython.MissionSpec(my_xml, True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(2) print("Waiting for the mission to start", end=' ') agent_host.startMission( my_mission, my_mission_record, ) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() agent_host.sendCommand('chat /kill @e[type=Chicken]') agent_host.sendCommand('chat /kill @e[type=Pig]') agent_host.sendCommand('chat /kill @e[type=Cow]') moves = 0 episode_reward = 0 while world_state.is_mission_running: world_state = agent_host.getWorldState() if world_state.number_of_observations_since_last_state > 0: try: obvsText = world_state.observations[-1].text data = json.loads(obvsText) except: print("Error when getting state") continue state = get_state(data) prev_x = data.get(u'XPos', 0) prev_y = data.get(u'YPos', 0) prev_z = data.get(u'ZPos', 0) useful_state = [state[2], state[6], state[7], state[8], \ state[10], state[11], state[13], \ state[14], state[16], state[17], \ state[18], state[22]] action = agent.act(useful_state) if ((action == 0 and state[grid_center - grid_width] == 0) or (action == 1 and state[grid_center + 1] == 0) or (action == 2 and state[grid_center + grid_width] == 0) or (action == 3 and state[grid_center - 1] == 0)): agent_host.sendCommand(jump_directions[action]) else: agent_host.sendCommand(directions[action]) time.sleep(0.25) #print("North:", state[grid_center - grid_width], \ # " East:", state[grid_center + 1], \ # " South:", state[grid_center + grid_width], \ # " West:", state[grid_center - 1]) try: world_state = wait_world_state(agent_host, world_state) obvsText = world_state.observations[-1].text data = json.loads(obvsText) except: print("Error when getting state") continue current_x = data.get(u'XPos', 0) current_y = data.get(u'YPos', 0) current_z = data.get(u'ZPos', 0) damage_taken = calculate_damage(prev_y, current_y) next_state = get_state(data) useful_next_state = [state[2], state[6], state[7], state[8], \ state[10], state[11], state[13], \ state[14], state[16], state[17], \ state[18], state[22]] # print("previous and current y", prev_y, current_y) # print("damage taken", damage_taken) #print("X:", prev_x, current_x, "\n", \ # "Y:", prev_y, current_y, "\n", \ # "Z:", prev_z, current_z, "\n") reward = 2 * ( prev_y - current_y ) - 50 * damage_taken - 1 if prev_x != current_x or prev_y != current_y or prev_z != current_z else -1000 episode_reward += reward done = True if current_y <= goal_height or not world_state.is_mission_running or data[ 'Life'] <= 0 else False agent.remember(useful_state, action, reward, useful_next_state, done) if ((action == 0 and state[grid_center - grid_width] == 0) or (action == 1 and state[grid_center + 1] == 0) or (action == 2 and state[grid_center + grid_width] == 0) or (action == 3 and state[grid_center - 1] == 0)): print( 'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}' .format(e, episodes, jump_directions[action], reward, agent.epsilon, moves, done)) else: print( 'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}' .format(e, episodes, directions[action], reward, agent.epsilon, moves, done)) moves += 1 if mode == 'train' or model == None: write_to_csv('./data/moves.csv', [e, current_x, current_y, current_z, reward]) if e > batch_size: agent.replay(batch_size) if done or moves > max_moves: agent_host.sendCommand("quit") if (mode == 'train' or model == None) and (e in checkpoints or agent.epsilon <= epsilon_min): print('saving model at episode {}'.format(e)) agent.save('./models/model_{}'.format(e)) if agent.epsilon <= epsilon_min: break time.sleep(1) # my_mission.forceWorldReset() if mode == 'train' or model == None: write_to_csv('./data/results.csv', [e, episode_reward, moves, int(episode_reward > 0)])
def _add_default_client(self): self.my_client_pool = MalmoPython.ClientPool() self.my_client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000))
def init(self, client_pool=None, role=0, continuous_discrete=True, add_noop_command=None, max_retries=30, retry_sleep=3, step_sleep=0.001, skip_steps=0, videoResolution=None, videoWithDepth=None, observeRecentCommands=None, observeHotBar=None, observeFullInventory=None, observeGrid=None, observeDistance=None, observeChat=None, allowContinuousMovement=None, allowDiscreteMovement=None, allowAbsoluteMovement=None, recordDestination=None, recordObservations=None, recordRewards=None, recordCommands=None, recordMP4=None, gameMode=None, forceWorldReset=None, turn_based=False, experiment_id="experimentid"): self.role = role self.max_retries = max_retries self.retry_sleep = retry_sleep self.step_sleep = step_sleep self.skip_steps = skip_steps self.forceWorldReset = forceWorldReset self.continuous_discrete = continuous_discrete self.add_noop_command = add_noop_command self.experiment_id = experiment_id if turn_based: self._turn = TurnState() if videoResolution: if videoWithDepth: self.mission_spec.requestVideoWithDepth(*videoResolution) else: self.mission_spec.requestVideo(*videoResolution) if observeRecentCommands: self.mission_spec.observeRecentCommands() if observeHotBar: self.mission_spec.observeHotBar() if observeFullInventory: self.mission_spec.observeFullInventory() if observeGrid: self.mission_spec.observeGrid(*(observeGrid + ["grid"])) if observeDistance: self.mission_spec.observeDistance(*(observeDistance + ["dist"])) if observeChat: self.mission_spec.observeChat() if allowContinuousMovement or allowDiscreteMovement or allowAbsoluteMovement: # if there are any parameters, remove current command handlers first self.mission_spec.removeAllCommandHandlers() if allowContinuousMovement is True: self.mission_spec.allowAllContinuousMovementCommands() elif isinstance(allowContinuousMovement, list): for cmd in allowContinuousMovement: self.mission_spec.allowContinuousMovementCommand(cmd) if allowDiscreteMovement is True: self.mission_spec.allowAllDiscreteMovementCommands() elif isinstance(allowDiscreteMovement, list): for cmd in allowDiscreteMovement: self.mission_spec.allowDiscreteMovementCommand(cmd) if allowAbsoluteMovement is True: self.mission_spec.allowAllAbsoluteMovementCommands() elif isinstance(allowAbsoluteMovement, list): for cmd in allowAbsoluteMovement: self.mission_spec.allowAbsoluteMovementCommand(cmd) if client_pool: if not isinstance(client_pool, list): raise ValueError( "client_pool must be list of tuples of (IP-address, port)") self.client_pool = MalmoPython.ClientPool() for client in client_pool: self.client_pool.add(MalmoPython.ClientInfo(*client)) # TODO: produce observation space dynamically based on requested features self.video_height = self.mission_spec.getVideoHeight(0) self.video_width = self.mission_spec.getVideoWidth(0) self.video_depth = self.mission_spec.getVideoChannels(0) self.observation_space = spaces.Box(low=0, high=255, shape=(self.video_height, self.video_width, self.video_depth), dtype=np.uint8) # dummy image just for the first observation # self.last_image = np.zeros((self.video_height, self.video_width, self.video_depth), dtype=np.uint8) self.last_image = np.zeros( (self.video_height * self.video_width * self.video_depth), dtype=np.uint8) self._create_action_space() # mission recording self.mission_record_spec = MalmoPython.MissionRecordSpec( ) # record nothing if recordDestination: self.mission_record_spec.setDestination(recordDestination) if recordRewards: self.mission_record_spec.recordRewards() if recordCommands: self.mission_record_spec.recordCommands() if recordMP4: self.mission_record_spec.recordMP4(*recordMP4) if gameMode: if gameMode == "spectator": self.mission_spec.setModeToSpectator() elif gameMode == "creative": self.mission_spec.setModeToCreative() elif gameMode == "survival": logger.warn( "Cannot force survival mode, assuming it is the default.") else: assert False, "Unknown game mode: " + gameMode