def __init__(self, mission_xml, num_agents): self.mission = MalmoPython.MissionSpec(mission_xml, True) self.mission_record = MalmoPython.MissionRecordSpec() self.num_agents = num_agents self.experiment_ID = str(uuid.uuid4()) self.client_pool = MalmoPython.ClientPool() for x in range(10000, 10000 + NUM_AGENTS + 1): self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', x)) # Create one agent host for parsing self.agent_hosts = [MalmoPython.AgentHost()] try: self.agent_hosts[0].parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_hosts[0].getUsage()) exit(1) if self.agent_hosts[0].receivedArgument("help"): print(self.agent_hosts[0].getUsage()) exit(0) # Create the rest of the agent hosts. if self.num_agents > 1: self.agent_hosts += [ MalmoPython.AgentHost() for x in range(NUM_AGENTS - 1) ]
def get_client_pool(self): my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 20000)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003)) return my_client_pool
def __init__(self, missionXML, serverIp='127.0.0.1'): self.missionDesc = None self.mission = None self.mission_record = None self.setMissionXML(missionXML) nAgents = len(missionXML.agentSections) self.agent_hosts = [] self.agent_hosts += [MalmoPython.AgentHost() for n in range(nAgents)] self.agent_hosts[0].parse(sys.argv) if self.receivedArgument('recording_dir'): recordingsDirectory = malmoutils.get_recordings_directory( self.agent_hosts[0]) self.mission_record.recordRewards() self.mission_record.recordObservations() self.mission_record.recordCommands() self.mission_record.setDestination(recordingsDirectory + "//" + "lastRecording.tgz") if self.agent_hosts[0].receivedArgument("record_video"): self.mission_record.recordMP4(24, 2000000) self.client_pool = MalmoPython.ClientPool() for x in range(10000, 10000 + nAgents): self.client_pool.add(MalmoPython.ClientInfo(serverIp, x)) self.worldStates = [None] * nAgents self.observe = [None] * nAgents self.isAlive = [True] * nAgents self.frames = [None] * nAgents self.segmentation_frames = [None] * nAgents
def run_mission(rambo_steve, episode): agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument('help'): print(agent_host.getUsage()) my_mission = MalmoPython.MissionSpec(world.getMissionXML(), True) # adding the recordedFileName into MissionRecordSpec my_mission_record = MalmoPython.MissionRecordSpec() # my_mission = malmoutils.get_default_recording_object(agent_host, "Mission") # adding the spec for adding the recording of the video # my_mission.requestVideo(1280, 720) # my_mission_record.recordMP4(30, 2000000) # set up client to connect: my_clients = MalmoPython.ClientPool() for i in range(5): my_clients.add( MalmoPython.ClientInfo('127.0.0.1', c.MISSION_CONTROL_PORT + i)) # Attempt to start a mission: print('Attempting to start mission...') max_retries = 5 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "RamboSteve") break except RuntimeError as e: if retry == max_retries - 1: print('Error starting mission:', e) exit(1) else: time.sleep(2) # Loop until mission starts: print('Waiting for the mission to start ', end=' ') world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print('.', end='') time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print('Error:', error.text) print() print('Mission running ', end=' ') rambo_steve.run(agent_host, episode) print() print('Mission ended') time.sleep(2)
def get_client_pool(self): """ Malmo specific function: To create client pool for connecting to the minecraft server """ my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 20000)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10002)) # my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10003)) return my_client_pool
def initalizeMinecraftMap(xml): agent_host = MalmoPython.AgentHost() my_mission = MalmoPython.MissionSpec(xml, True) recordedFileName = recordPath.format("final_take0_bad.tgz") #comment out to not capture video #my_mission_record = MalmoPython.MissionRecordSpec() my_mission_record = MalmoPython.MissionRecordSpec(recordedFileName) #comment out to not capture video my_mission.requestVideo(1200,720) my_mission_record.recordMP4(30, 2000000) #comment out to not capture video my_mission.setViewpoint(1) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available return (my_mission,agent_host,my_clients,my_mission_record)
def __init__(self, port=None, existing=False): self.existing = existing if not existing: if not port: port = InstanceManager._get_valid_port() cmd = InstanceManager.MC_COMMAND if InstanceManager.headless: cmd += " -headless " cmd += " -port " + str(port) logger.info("Starting Minecraft process: " + cmd) args = shlex.split(cmd) proc = subprocess.Popen( args, cwd=InstanceManager.MINECRAFT_DIR, # pipe entire output stdout=subprocess.PIPE, stderr=subprocess.STDOUT, # use process group, see http://stackoverflow.com/a/4791612/18576 preexec_fn=os.setsid) # wait until Minecraft process has outputed "CLIENT enter state: DORMANT" while True: line = proc.stdout.readline() logger.debug(line) if not line: raise EOFError( "Minecraft process finished unexpectedly") if b"CLIENT enter state: DORMANT" in line: break logger.info("Minecraft process ready") # supress entire output, otherwise the subprocess will block # NB! there will be still logs under Malmo/Minecraft/run/logs # FNULL = open(os.devnull, 'w') FMINE = open('./minecraft.log', 'w') proc.stdout = FMINE self.proc = proc else: assert port is not None, "No existing port specified." self.ip = InstanceManager.DEFAULT_IP self.port = port self.existing = existing self.locked = False # Creating client pool. logger.info("Creating client pool for {}".format(self)) self.client_pool = MalmoPython.ClientPool() self.client_pool.add(MalmoPython.ClientInfo(self.ip, self.port))
def create_malmo_components(): # setup client pool client_pool = MalmoPython.ClientPool() for port in map(int, opts.malmo_ports.split(",")): print >>sys.stderr, "adding client with port %d" % port client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port)) # setup agent host malmo = MalmoPython.AgentHost() # can't do this without more complex caching of world state vid frames #malmo.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) # load mission spec mission = MalmoPython.MissionSpec(specs.classroom(opts, overclock_tick_ms), True) mission_record = MalmoPython.MissionRecordSpec() # return all return client_pool, malmo, mission, mission_record
def run(self): """Runs the game with the registered agents Raises: :class:`jason_malmo.exceptions.NoAgentsException`: There are not registered agents in the game.\n Register an agent before running the game:: game.register('/path/to/file.asl') game.run() """ self._client_pool = MalmoPython.ClientPool() if not len(self._agents): raise NoAgentsException for port in range(10000, 10000 + len(self._agents) + 1): self._client_pool.add(MalmoPython.ClientInfo('127.0.0.1', port)) self._my_mission = MalmoPython.MissionSpec(self._get_mission_xml(), True) for (index, agent) in enumerate(self._agents): malmoutils.parse_command_line(agent.malmo_agent) self._safe_start_mission( agent.malmo_agent, self._my_mission, self._client_pool, malmoutils.get_default_recording_object( agent.malmo_agent, "saved_data"), index, '') self._safe_wait_for_start( [agent.malmo_agent for agent in self._agents]) threads = [] for agent in self._agents: thr = threading.Thread(target=self._jason_env.run_agent, args=(agent, ), kwargs={}) thr.start() threads.append(thr) # TODO while mission is running while True: for agent in self._agents: for (belief, value) in agent.beliefs.items(): if belief[0] == 'tasks': tasks = [] for task in list(value)[0].args[0]: tasks.append(task) self.tasks.handle(agent, tasks) time.sleep(0.05)
def __init__(self, _): # graphing the returns self.log_frequency = 1 self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = [] # DISCRETE ACTION SPACE [0, 5]: # - action 0 = attack # - action 1 = switch to sword # - action 2 = switch to axe # - action 3 = use gapple # - action 4 = use shield (1 second) # - action 5 = idle self.action_space = Discrete(6) # CONTINUOUS OBSERVATION SPACE: # - enemy in range: true=1, false=0 # - my health normalized: [0, 1] # - enemy health normalized: [0, 1] # - enemy weapon: axe=1, sword=0.75, gapple=0.25, shield=0 (offensive to defensive scale) # - distance apart from both agents self.observation_space = Box(0, 1, shape=(5, ), dtype=np.float32) ################################### # Malmo parameters self.agent_hosts = [Malmo.AgentHost() for _ in range(2)] # Create client pool self.client_pool = Malmo.ClientPool() self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10000)) self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10001)) ################################### # Custom parameters self.mission_index = 0 ################################### # self-play parameters #self.opponent_policy = load_trained_agent(CURRENT_CHECKPOINT) self.use_self_play = False self.first_reset = True
def start(self): self.malmo_client_pool = MalmoPython.ClientPool() self.malmo_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001)) # 10000 in use - try 10001 self.malmo_mission = MalmoPython.MissionSpec(self.missionXML, True) self.malmo_mission.forceWorldReset() self.malmo_mission_record = MalmoPython.MissionRecordSpec() self.malmo_mission.requestVideo(800, 500) self.malmo_mission.setViewpoint(1) # Attempt to start a mission: max_retries = 10 for retry in range(max_retries): try: self.agent_host.startMission(self.malmo_mission, self.malmo_mission_record ) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) # Loop until mission starts: print("Waiting for the mission to start ") self.world_state = self.agent_host.getWorldState() while not self.world_state.has_mission_begun: sys.stdout.write(".") time.sleep(0.1) self.world_state = self.agent_host.getWorldState() for error in self.world_state.errors: print("Error:", error.text) print(" ") print("Mission running ") self.number += 1 self.start_time = time.time() self.end_time = None
def startGame(): #Find filename for the recording: filenum = 0 fileRecording = '' while fileRecording == '': fileRecording = recordingsDirectory + '/saved_data' + str( filenum) + '.tar.gz' if os.path.isfile(fileRecording): filenum = filenum + 1 fileRecording = '' my_mission_record = MalmoPython.MissionRecordSpec(fileRecording) my_mission_record.recordCommands() my_mission_record.recordMP4(20, 400000) my_mission_record.recordRewards() my_mission_record.recordObservations() try: display_gui = 1 if want_own_display: display_gui = 0 agent_host.startMission(my_mission, MalmoPython.ClientPool(), my_mission_record, display_gui, rom_file) except RuntimeError as e: print("Error starting mission:", e) exit(1) print("Waiting for the mission to start", end=' ') world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() gamestats = "Go " + str(gameNum + 1) + " out of " + str(iterations) + "\n" canvas.delete("all") canvas.create_text( 80, 105, text=gamestats + "Click to begin!\nEscape to end" ) # The window needs keyboard focus or no way to control game.
def init_malmo(self): """ Initialize new Malmo mission. """ # Load the XML file and create mission spec & record. mission_file = './mission.xml' with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) # Attempt to start Malmo. max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available for retry in range(max_retries): try: self.agent_host.startMission(my_mission, my_clients, my_mission_record, 0, 'Agent') break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) # Start the world. world_state = self.agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.1) world_state = self.agent_host.getWorldState() for error in world_state.errors: print("\nError:", error.text) self.initialize() return world_state
def __init__(self, _): # Graphing the returns self.step_rewards = [] # DISCRETE ACTION SPACE [0, 5]: # - action 0 = attack # - action 1 = switch to sword # - action 2 = switch to axe # - action 3 = use gapple # - action 4 = use shield (1 second) # - action 5 = idle self.action_space = Discrete(6) # CONTINUOUS OBSERVATION SPACE: # - enemy in range: true=1, false=0 # - my health normalized: [0, 1] # - enemy health normalized: [0, 1] # - enemy weapon: axe=1, sword=0.75, gapple=0.25, shield=0 (offensive to defensive scale) self.observation_space = Box(0, 1, shape=(4, ), dtype=np.float32) ################################### # Malmo parameters self.agent_hosts = [Malmo.AgentHost() for _ in range(2)] # Create client pool self.client_pool = Malmo.ClientPool() self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10001)) self.client_pool.add(Malmo.ClientInfo("127.0.0.1", 10002)) self.mission_index = 0 self.old_checkpoint = -1 ################################### # self-play parameters self.opponent_policy = load_trained_agent(get_current_checkpoint()) self.use_self_play = False self.last_load = 0 self.first_reset = True
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec() my_mission.timeLimitInSeconds( 10 ) my_mission.requestVideo( 320, 240 ) my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:",error.text) print() last_delta = time.time() # main loop: while world_state.is_mission_running: agent_host.sendCommand( "move 1" ) agent_host.sendCommand( "turn " + str(random.random()*2-1) ) time.sleep(0.5) world_state = agent_host.getWorldState() print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:",reward.getValue()) for error in world_state.errors: print("Error:",error.text) for frame in world_state.video_frames: print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
def run(size, algo1, algo2): #algorithms = {"reflex": reflex.reflex, "hiddenMarkov": hiddenMarkov.hiddenMarkov, "minimax":minimax.minimax, "expectimax": expectimax.expectimax} algorithms = { "reflex": reflex.reflex, 'random': randomagent.randommove, 'smartrandom': smartrandomagent.randommove, 'astarreflex': AStarReflex.search, "minimax": minimax.minmax } #assert len(sys.argv) == 4, "Wrong number of arguments, the form is: mapSize, agent algorithm, enemy alogrithm" malmoutils.fix_print() # -- set up two agent hosts -- agent_host1 = MalmoPython.AgentHost() agent_host2 = MalmoPython.AgentHost() #map_size = str(sys.argv[1]) map_size = int(size) map_minus = str(map_size - 1) agentAlgo = algorithms[algo1] enemyAlgo = algorithms[algo2] #agentAlgo = algorithms[sys.argv[2]] #enemyAlgo = algorithms[sys.argv[3]] # Use agent_host1 for parsing the command-line options. # (This is why agent_host1 is passed in to all the subsequent malmoutils calls, even for # agent 2's setup.) malmoutils.parse_command_line(agent_host1) missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hello world!</Summary> </About> <ServerSection> <ServerInitialConditions> <Time> <StartTime>12000</StartTime> <AllowPassageOfTime>false</AllowPassageOfTime> </Time> </ServerInitialConditions> <ServerHandlers> <FlatWorldGenerator generatorString="3;7,220*1,5*3,2;3;,biome_1"/> <DrawingDecorator> <!-- coordinates for cuboid are inclusive --> <DrawCuboid x1="0" y1="45" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="300" z2=''' + '"' + map_minus + '"' + ''' type="air" /> <!-- limits of our arena --> <DrawCuboid x1="0" y1="40" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="44" z2=''' + '"' + map_minus + '"' + ''' type="lava" /> <!-- lava floor --> <DrawCuboid x1="0" y1="46" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="46" z2=''' + '"' + map_minus + '"' + ''' type="snow" /> </DrawingDecorator> <ServerQuitFromTimeUp timeLimitMs="30000"/> </ServerHandlers> </ServerSection> <AgentSection mode="Survival"> <Name>Agent</Name> <AgentStart> <Inventory> <InventoryItem slot="0" type="diamond_shovel"/> </Inventory> <Placement x="0.5" y="47.0" z="0.5" pitch="50" yaw="0"/> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <ObservationFromGrid> <Grid name="floor3x3W"> <min x="-1" y="0" z="-1"/> <max x="1" y="0" z="1"/> </Grid> <Grid name="floor3x3F"> <min x="-1" y="-1" z="-1"/> <max x="1" y="-1" z="1"/> </Grid> </ObservationFromGrid> <DiscreteMovementCommands/> </AgentHandlers> </AgentSection> <AgentSection mode="Survival"> <Name>Enemy</Name> <AgentStart> <Inventory> <InventoryItem slot="0" type="diamond_shovel"/> </Inventory> <Placement x=''' + '"' + str( float(map_size) - 0.5) + '"' + ''' y="47.0" z=''' + '"' + str( float(map_size) - 0.5) + '"' + ''' pitch="50" yaw="180"/> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <DiscreteMovementCommands/> <ObservationFromGrid> <Grid name="floor3x3W"> <min x="-1" y="0" z="-1"/> <max x="1" y="0" z="1"/> </Grid> <Grid name="floor3x3F"> <min x="-1" y="-1" z="-1"/> <max x="1" y="-1" z="1"/> </Grid> </ObservationFromGrid> <RewardForTouchingBlockType> <Block reward="-100.0" type="lava" behaviour="onceOnly"/> </RewardForTouchingBlockType> <AgentQuitFromTouchingBlockType> <Block type="lava" /> </AgentQuitFromTouchingBlockType> </AgentHandlers> </AgentSection> </Mission>''' # Create default Malmo objects: my_mission = MalmoPython.MissionSpec(missionXML, True) client_pool = MalmoPython.ClientPool() client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF) my_mission_record = MalmoPython.MissionRecordSpec() def safeStartMission(agent_host, mission, client_pool, recording, role, experimentId): used_attempts = 0 max_attempts = 5 print("Calling startMission for role", role) while True: try: agent_host.startMission(mission, client_pool, recording, role, experimentId) break except MalmoPython.MissionException as e: errorCode = e.details.errorCode if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP: print("Server not quite ready yet - waiting...") time.sleep(2) elif errorCode == MalmoPython.MissionErrorCode.MISSION_INSUFFICIENT_CLIENTS_AVAILABLE: print("Not enough available Minecraft instances running.") used_attempts += 1 if used_attempts < max_attempts: print("Will wait in case they are starting up.", max_attempts - used_attempts, "attempts left.") time.sleep(2) elif errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_NOT_FOUND: print( "Server not found - has the mission with role 0 been started yet?" ) used_attempts += 1 if used_attempts < max_attempts: print("Will wait and retry.", max_attempts - used_attempts, "attempts left.") time.sleep(2) else: print("Other error:", e.message) print("Waiting will not help here - bailing immediately.") exit(1) if used_attempts == max_attempts: print("All chances used up - bailing now.") exit(1) print("startMission called okay.") def safeWaitForStart(agent_hosts): print("Waiting for the mission to start", end=' ') start_flags = [False for a in agent_hosts] start_time = time.time() time_out = 120 # Allow two minutes for mission to start. while not all(start_flags) and time.time() - start_time < time_out: states = [a.peekWorldState() for a in agent_hosts] start_flags = [w.has_mission_begun for w in states] errors = [e for w in states for e in w.errors] if len(errors) > 0: print("Errors waiting for mission start:") for e in errors: print(e.text) print("Bailing now.") exit(1) time.sleep(0.1) print(".", end=' ') print() if time.time() - start_time >= time_out: print("Timed out waiting for mission to begin. Bailing.") exit(1) print("Mission has started.") safeStartMission(agent_host1, my_mission, client_pool, my_mission_record, 0, '') safeStartMission(agent_host2, my_mission, client_pool, my_mission_record, 1, '') safeWaitForStart([agent_host1, agent_host2]) def movement(ah, direction, pos): if direction == "north": ah.sendCommand("movenorth 1") position = (pos[0], pos[1] - 1) elif direction == "south": ah.sendCommand("movesouth 1") position = (pos[0], pos[1] + 1) elif direction == "west": ah.sendCommand("movewest 1") position = (pos[0] - 1, pos[1]) elif direction == "east": ah.sendCommand("moveeast 1") position = (pos[0] + 1, pos[1]) else: position = (pos[0], pos[1]) time.sleep(0.1) return position def attack(ah, index, pos, map, enemy=False): #We are going to make it so the agent can only break the blocks immediately around them. #So a location will be one of the 8 locations around it #Enemy starts facing north (1), Agent starts facing south (3) # Enemy: 0 1 0 Agent: 0 3 0 # 4 X 2 2 X 4 # 0 3 0 0 1 0 x, y = math.floor(pos[0]), math.floor(pos[1]) #print("Player position: {},{} Direction: {}".format(x,y, index)) did_Break = False if enemy: if index == "north": # print("Index 1") ah.sendCommand("attack 1") time.sleep(0.1) y -= 1 did_Break = True if index == "east": # print("Index 2") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) x += 1 did_Break = True if index == "west": # print("Index 4") ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) x -= 1 did_Break = True if index == "south": # print("Index 3") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) y += 1 did_Break = True else: # Agent: 0 3 0 # 2 X 4 # 0 1 0 if index == "south": # print("Index 3") ah.sendCommand("attack 1") time.sleep(0.1) y += 1 did_Break = True if index == "west": # print("Index 4") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) x -= 1 did_Break = True if index == "east": # print("Index 2") ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) x += 1 did_Break = True if index == "north": # print("Index 3") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) y -= 1 did_Break = True if did_Break: map[x][y] = False ''' Sample Observation: {"DistanceTravelled":0,"TimeAlive":50,"MobsKilled":0,"PlayersKilled":0,"DamageTaken":0,"DamageDealt":0, "Life":20.0,"Score":0,"Food":20,"XP":0,"IsAlive":true,"Air":300,"Name":"Enemy","XPos":5.5,"YPos":47.0, "ZPos":5.5,"Pitch":50.0,"Yaw":180.0,"WorldTime":12000,"TotalTime":57} ''' agent_score = 0 #count = 0 agent_ob = None enemy_ob = None map = [[True for i in range(0, int(map_size))] for j in range(0, int(map_size))] # for i in map: # print(i) while True: #Scores should decrease with time and get a bonus if they win agent_score -= 1 agent_state = agent_host1.peekWorldState() enemy_state = agent_host2.peekWorldState() if agent_state.number_of_observations_since_last_state > 0: agent_ob = json.loads(agent_state.observations[-1].text) if enemy_state.number_of_observations_since_last_state > 0: enemy_ob = json.loads(enemy_state.observations[-1].text) if agent_ob is None or enemy_ob is None: continue if agent_state.is_mission_running == False: break agent_position = (agent_ob["XPos"], agent_ob["ZPos"]) enemy_position = (enemy_ob["XPos"], enemy_ob["ZPos"]) agent_grid = agent_ob.get(u'floor3x3F', 0) enemy_grid = enemy_ob.get(u'floor3x3F', 0) if "lava" in agent_grid: print("Enemy Won!") agent_score -= 100 for i in map: print(i) return 0 break if "lava" in enemy_grid: print("Agent Won!") agent_score += 100 for i in map: print(i) return 1 break agentMoveString, agentBreakIndex = agentAlgo(agent_host1, agent_position, enemy_position, agent_grid, map) enemyMoveString, enemyBreakIndex = enemyAlgo(agent_host2, enemy_position, agent_position, enemy_grid, map) # #Agent Turn to Break attack(agent_host1, agentBreakIndex, agent_position, map) # #Enemy Turn to Move pos = movement(agent_host2, enemyMoveString, enemy_position) # #Enemy Turn to Break attack(agent_host2, enemyBreakIndex, pos, map, enemy=True) # #Agent Turn to Move movement(agent_host1, agentMoveString, agent_position) for i in map: print(i) return 2
def __init__(self, world_def, video_dim=(32, 32), num_parallel=1, time_limit=20, reset=True, discrete_actions=False, vision_observation=False, depth=False, num_frames=1, grayscale=True): self.video_width, self.video_height = video_dim self.image_width, self.image_height = video_dim self.discrete_actions = discrete_actions self.vision_observation = vision_observation self.depth = depth self.num_parallel = num_parallel self.world_def = world_def self.mission = self.world_def.generate_mission(reset=reset) #self.XGoalPos, self.YGoalPos = self.world_def.goal_pos[0], self.world_def.goal_pos[2] self.mission.requestVideo(self.video_height, self.video_width) self.mission.observeRecentCommands() self.mission.allowAllContinuousMovementCommands() self.mission.timeLimitInSeconds(time_limit) if self.num_parallel > 1: self.client_pool = MalmoPython.ClientPool() for i in range(num_parallel): port = 10000 + i self.client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port)) self.agent_host = MalmoPython.AgentHost() self.agent_host.setObservationsPolicy( MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS) # self.agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) #self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.KEEP_ALL_FRAMES) self.agent_host.setVideoPolicy( MalmoPython.VideoPolicy.LATEST_FRAME_ONLY) self.mission_record_spec = MalmoPython.MissionRecordSpec() if discrete_actions: self._action_set = { 0: "move 1", 1: "turn 0.5", 2: "turn -0.5", 3: None } self.action_space = Discrete(n=len(self._action_set)) else: self._action_set = [ ("move", (-1, 1)), ("turn", (-1, 1)), ("pitch", (-1, 1)), ("use", (0, 1)), ("jump", (0, 1)), ] # self._action_set = [("move", (0, 1)), # ("move", (-1, 0)), # ("turn", (0, 1)), # ("turn", (-1, 0))] lower_bound = np.asarray([x[1][0] for x in self._action_set]) upper_bound = np.asarray([x[1][1] for x in self._action_set]) self.action_space = Box(lower_bound, upper_bound) self.num_frames = num_frames self.grayscale = grayscale if self.grayscale: self.num_frame_channels = 1 high = 1 else: self.num_frame_channels = 3 high = 255 # Obs keys and bounds x_bounds = self.world_def.x_bounds z_bounds = self.world_def.z_bounds self.max_dist = np.linalg.norm((x_bounds[-1], z_bounds[-1])) self.minDistanceFromGoal = None if self.vision_observation: self.observation_space = Box( low=0, high=high, shape=(self.image_height, self.image_width, self.num_frames * self.num_frame_channels)) else: self.obs_keys = [ (u'XPos', x_bounds), (u'YPos', (200, 300)), (u'ZPos', z_bounds), (u'yaw', (0, 360)), (u'pitch', (0, 180)), #(u'XGoalPos', x_bounds), #(u'YGoalPos', z_bounds), (u'DistanceTravelled', (0, 30)), (u'distanceFromGoal', (0, self.max_dist)) ] l_bounds = [key[1][0] for key in self.obs_keys] u_bounds = [key[1][1] for key in self.obs_keys] self.observation_space = Box(np.array(l_bounds), np.array(u_bounds)) self.last_obs = None self.cum_reward = 0 self.distance_travelled = 0 self.terminal = False self.jump = 0
def init(self, client_pool=None, start_minecraft=None, continuous_discrete=True, add_noop_command=None, max_retries=90, retry_sleep=10, step_sleep=0.001, skip_steps=0, videoResolution=None, videoWithDepth=None, observeRecentCommands=None, observeHotBar=None, observeFullInventory=None, observeGrid=None, observeDistance=None, observeChat=None, allowContinuousMovement=None, allowDiscreteMovement=None, allowAbsoluteMovement=None, recordDestination=None, recordObservations=None, recordRewards=None, recordCommands=None, recordMP4=None, gameMode=None, forceWorldReset=None): self.max_retries = max_retries self.retry_sleep = retry_sleep self.step_sleep = step_sleep self.skip_steps = skip_steps self.forceWorldReset = forceWorldReset self.continuous_discrete = continuous_discrete self.add_noop_command = add_noop_command if videoResolution: if videoWithDepth: self.mission_spec.requestVideoWithDepth(*videoResolution) else: self.mission_spec.requestVideo(*videoResolution) if observeRecentCommands: self.mission_spec.observeRecentCommands() if observeHotBar: self.mission_spec.observeHotBar() if observeFullInventory: self.mission_spec.observeFullInventory() if observeGrid: self.mission_spec.observeGrid(*(observeGrid + ["grid"])) if observeDistance: self.mission_spec.observeDistance(*(observeDistance + ["dist"])) if observeChat: self.mission_spec.observeChat() if allowContinuousMovement or allowDiscreteMovement or allowAbsoluteMovement: # if there are any parameters, remove current command handlers first self.mission_spec.removeAllCommandHandlers() if allowContinuousMovement is True: self.mission_spec.allowAllContinuousMovementCommands() elif isinstance(allowContinuousMovement, list): for cmd in allowContinuousMovement: self.mission_spec.allowContinuousMovementCommand(cmd) if allowDiscreteMovement is True: self.mission_spec.allowAllDiscreteMovementCommands() elif isinstance(allowDiscreteMovement, list): for cmd in allowDiscreteMovement: self.mission_spec.allowDiscreteMovementCommand(cmd) if allowAbsoluteMovement is True: self.mission_spec.allowAllAbsoluteMovementCommands() elif isinstance(allowAbsoluteMovement, list): for cmd in allowAbsoluteMovement: self.mission_spec.allowAbsoluteMovementCommand(cmd) if start_minecraft: # start Minecraft process assigning port dynamically self.mc_process, port = minecraft_py.start() logger.info( "Started Minecraft on port %d, overriding client_pool.", port) client_pool = [('127.0.0.1', port)] if client_pool: if not isinstance(client_pool, list): raise ValueError( "client_pool must be list of tuples of (IP-address, port)") self.client_pool = MalmoPython.ClientPool() for client in client_pool: self.client_pool.add(MalmoPython.ClientInfo(*client)) # TODO: produce observation space dynamically based on requested features self.video_height = self.mission_spec.getVideoHeight(0) self.video_width = self.mission_spec.getVideoWidth(0) self.video_depth = self.mission_spec.getVideoChannels(0) self.observation_space = spaces.Box(low=0, high=255, shape=(self.video_height, self.video_width, self.video_depth)) # dummy image just for the first observation self.last_image = np.zeros( (self.video_height, self.video_width, self.video_depth), dtype=np.uint8) self._create_action_space() # mission recording self.mission_record_spec = MalmoPython.MissionRecordSpec( ) # record nothing if recordDestination: self.mission_record_spec.setDestination(recordDestination) if recordRewards: self.mission_record_spec.recordRewards() if recordCommands: self.mission_record_spec.recordCommands() if recordMP4: self.mission_record_spec.recordMP4(*recordMP4) if gameMode: if gameMode == "spectator": self.mission_spec.setModeToSpectator() elif gameMode == "creative": self.mission_spec.setModeToCreative() elif gameMode == "survival": logger.warn( "Cannot force survival mode, assuming it is the default.") else: assert False, "Unknown game mode: " + gameMode
def main(): #Hardcode number of agents to play song num_agents = 4 #Obtain song csv and get solutions #freq_list = mt.create_note_list("Twinkle_Twinkle_Little_Star.csv",120,7000,-.08) #1 Agent #freq_list = mt.create_note_list("Chopsticks.csv",120,4000,-.15,.03) #2 Agents freq_list = mt.create_note_list("Bad_Apple.csv", 120, 3000, -.08, .03) #2 Agents #freq_list = mt.create_note_list("Grenade_120BPM.csv",120,1500,-.08,.03) #4 Agents freq_list = mt.number_converter(freq_list) solutions = cs.get_solutions(freq_list, num_agents) print(solutions) #print(solutions) #Get Mission. Needed for teleport positions. missionXML = getMissionXML(num_agents) #Create musician for each agent and pass teleport positions. musicians = [] for i in range(num_agents): agent_positions = generateAgentTeleportPositions(note_positions, i) musicians.append(Musician(agent_positions)) ''' MALMO ''' print('Starting...', flush=True) #Create agents. agent_hosts = [] for i in range(num_agents): agent_hosts.append(MalmoPython.AgentHost()) malmoutils.parse_command_line(agent_hosts[0]) #Get mission and allow commands for teleport. my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission.allowAllChatCommands() #Add client for each agent needed. my_client_pool = MalmoPython.ClientPool() for i in range(num_agents): my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000 + i)) MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF) #Start mission for each agent for i in range(num_agents): startMission( agent_hosts[i], my_mission, my_client_pool, malmoutils.get_default_recording_object( agent_hosts[0], "agent_" + str(i + 1) + "_viewpoint_discrete"), i, '') #Wait for all missions to begin. waitForStart(agent_hosts) #Pause for simulation to begin. time.sleep(1) ''' SIMULATION BEGINS HERE ''' for i in range(len(solutions[0])): #teleport each agent to the corresponding note. for j in range(len(musicians)): musicians[j].teleport_to_noteblock(agent_hosts[j], solutions[j][i]) # play each note. for k in range(len(musicians)): if musicians[k].can_play: agent_hosts[k].sendCommand("attack 1") time.sleep(0.001) for k in range(len(musicians)): if musicians[k].can_play: agent_hosts[k].sendCommand("attack 0") musicians[k].can_play = False #modifies the timing between each note hit. time.sleep(0.2)
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return #forceReset="true" missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hello world!</Summary> </About> <ServerSection> <ServerHandlers> <DefaultWorldGenerator forceReset="true" /> <ServerQuitFromTimeUp timeLimitMs="30000"/> <ServerQuitWhenAnyAgentFinishes/> </ServerHandlers> </ServerSection> <AgentSection mode="Survival"> <Name>MalmoTutorialBot</Name> <AgentStart> <Inventory> <InventoryItem slot="8" type="diamond_pickaxe"/> </Inventory> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <ObservationFromGrid> <Grid name="all_the_blocks" > <min x="-1" y="-1" z="-1"/> <max x="1" y="2" z="1"/> </Grid> </ObservationFromGrid> <ContinuousMovementCommands turnSpeedDegs="180"/> </AgentHandlers> </AgentSection> </Mission>''' malmoutils.fix_print() #agent_host = MalmoPython.AgentHost() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission.timeLimitInSeconds(300) my_mission.requestVideo(640, 480) #my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object( agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() last_delta = time.time() # main loop: #agent_host.sendCommand( "jump 1") TURN = 0 TURN2 = 0 JUMP = 0 while world_state.is_mission_running: print("New Iteration") if JUMP > 0: JUMP = JUMP - 1 if JUMP == 0: agent_host.sendCommand("jump 0") JUMP = JUMP - 1 agent_host.sendCommand("move 1") if math.sin(TURN) / 3 >= 0: agent_host.sendCommand("turn 0.15") else: agent_host.sendCommand("turn -0.2") print(TURN, " ", math.sin(TURN)) TURN = TURN + 0.3 #agent_host.sendCommand( "jump 1" ) time.sleep(0.5) world_state = agent_host.getWorldState() y = json.loads(world_state.observations[-1].text) #print(y["all_the_blocks"]) dir = "" if y["Yaw"] + 180 < 90: dir = "S" print("Facing South") elif y["Yaw"] < 180: dir = "W" print("Facing West") elif y["Yaw"] < 270: dir = "N" print("Facing North") else: dir = "E" print("Facing East") blocks = [[], [], [], []] i = 0 for x in y["all_the_blocks"]: blocks[math.floor(i / 9)].append(x) i = i + 1 if dir == "S": willjump = False for j in range(0, 3): if blocks[1][j] != "air": willjump = True print(j, blocks[1][j], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "W": willjump = False for j in range(0, 3): if blocks[1][j * 3] != "air": willjump = True print(j * 3, blocks[1][j * 3], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "E": willjump = False for j in range(1, 4): if blocks[1][j * 3 - 1] != "air": willjump = True print(j * 3 - 1, blocks[1][j * 3 - 1], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "N": willjump = False for j in range(0, 3): if blocks[1][j] != "air": willjump = True print(j, blocks[1][j + 6], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") if (blocks[1][2] != "air" and blocks[2][2] != "air" or blocks[1][4] != "air" and blocks[2][4] != "air" or blocks[1][2] != "air" and blocks[2][2] != "air" or blocks[1][4] != "air" and blocks[2][4] != "air"): TURN2 = 2 if TURN2 >= 0: agent_host.sendCommand("turn 1") TURN2 = TURN2 - 1 '''if blocks[1][5] != "air" or blocks[1][5] != "grass" or blocks[1][5] != "tallgrass" : JUMP = 2 agent_host.sendCommand( "jump 1" ) print() print(blocks[1][5])''' #print(len(blocks)) #print(blocks) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:", reward.getValue()) for error in world_state.errors: print("Error:", error.text) for frame in world_state.video_frames: print() #print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
def __init__(self, maze_def, reset, video_dim=(32, 32), num_parallel=1, time_limit=30, discrete_actions=False, vision_observation=True, depth=False, num_frames=1, grayscale=True): self.video_width, self.video_height = video_dim self.image_width, self.image_height = video_dim self.discrete_actions = discrete_actions self.vision_observation = vision_observation self.depth = depth self.num_parallel = num_parallel maze = create_maze(maze_def) self.mission_gen = MissionGen() self.mission = self.mission_gen.generate_mission( maze.create_maze_array(), reset=reset) self.XGoalPos, self.YGoalPos = self.mission_gen.goal_pos[ 0], self.mission_gen.goal_pos[2] # with open(mission_file, 'r') as f: # print("Loading mission from %s" % mission_file) # mission_xml = f.read() # self.mission = MalmoPython.MissionSpec(mission_xml, True) self.mission.requestVideo(self.video_height, self.video_width) self.mission.observeRecentCommands() self.mission.allowAllContinuousMovementCommands() # self.mission.timeLimitInSeconds(time_limit) if self.num_parallel > 1: self.client_pool = MalmoPython.ClientPool() for i in range(num_parallel): port = 10000 + i self.client_pool.add(MalmoPython.ClientInfo("127.0.0.1", port)) self.agent_host = MalmoPython.AgentHost() self.agent_host.setObservationsPolicy( MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS) # self.agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.KEEP_ALL_FRAMES) # self.agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY) self.mission_record_spec = MalmoPython.MissionRecordSpec() if discrete_actions: self._action_set = {0: "move 1", 1: "turn 1", 2: "turn -1"} self.action_space = Discrete(n=len(self._action_set)) else: # self._action_set = ["move", "turn", "pitch"] # self.action_space = Box(np.array([0, -.5, -.5]), np.array([1, .5, .5])) self._action_set = [("move", (-1, 1)), ("turn", (-0.5, 0.5))] #("jump", (-1, 1))] lower_bound = np.asarray([x[1][0] for x in self._action_set]) upper_bound = np.asarray([x[1][1] for x in self._action_set]) self.action_space = Box(lower_bound, upper_bound) self.num_frames = num_frames self.grayscale = grayscale if self.grayscale: self.num_frame_channels = 1 high = 1 else: self.num_frame_channels = 3 high = 255 # Obs keys and bounds x_bounds = self.mission_gen.x_bounds z_bounds = self.mission_gen.z_bounds self.max_dist = np.linalg.norm((x_bounds[-1], z_bounds[-1])) self.minDistanceFromGoal = None if self.vision_observation: self.observation_space = Box( low=0, high=high, shape=(self.num_frames * self.num_frame_channels, self.image_height, self.image_width)) else: self.obs_keys = [(u'XPos', x_bounds), (u'ZPos', z_bounds), (u'yaw', (0, 360)), (u'XGoalPos', x_bounds), (u'YGoalPos', z_bounds), (u'DistanceTravelled', (0, 30)), (u'distanceFromGoal', (0, self.max_dist))] l_bounds = [key[1][0] for key in self.obs_keys] u_bounds = [key[1][1] for key in self.obs_keys] self.observation_space = Box(np.array(l_bounds), np.array(u_bounds)) # self._horizon = env.spec.timestep_limit self.last_obs = None self.cum_reward = 0 self.distance_travelled = 0 self.terminal = False self.jump = 0
and AGENT_COOLDOWNS[1] <= 0 and not AGENT_IS_SHIELDING[1]: agent.sendCommand("attack 1") agent.sendCommand("attack 0") AGENT_COOLDOWNS[1] = ATTACK_COOLDOWNS[AGENT_WEAPONS[1]] if __name__ == "__main__": # Flush immediately print = functools.partial(print, flush=True) # Create agent host agent_hosts = [Malmo.AgentHost() for _ in range(AGENT_COUNT)] # Create client pool client_pool = Malmo.ClientPool() client_pool.add(Malmo.ClientInfo("127.0.0.1", 10000)) client_pool.add(Malmo.ClientInfo("127.0.0.1", 10002)) for a in range(MISSION_COUNT): print(f"Running mission #{a}...") # Create missions mission = Malmo.MissionSpec(get_mission_xml(), True) mission_id = str(uuid.uuid4()) # Start mission for a in range(AGENT_COUNT): start_mission(agent_hosts[a], mission, client_pool, Malmo.MissionRecordSpec(), a, mission_id) wait_for_start(agent_hosts)
def deep_q_learning_run(sess, agent_host, q_estimator, state_processor, experiment_dir, epsilon_start=1.0, epsilon_end=0.1, epsilon_decay_steps=8000): """ Q-Learning algorithm for off-policy TD control using Function Approximation. Finds the optimal greedy policy while following an epsilon-greedy policy. Args: sess: Tensorflow Session object env: OpenAI environment q_estimator: Estimator object used for the q values target_estimator: Estimator object used for the targets state_processor: A StateProcessor object num_episodes: Number of episodes to run for experiment_dir: Directory to save Tensorflow summaries in replay_memory_size: Size of the replay memory replay_memory_init_size: Number of random experiences to sampel when initializing the reply memory. update_target_estimator_every: Copy parameters from the Q estimator to the target estimator every N steps discount_factor: Gamma discount factor epsilon_start: Chance to sample a random action when taking an action. Epsilon is decayed over time and this is the start value epsilon_end: The final minimum value of epsilon after decaying is done epsilon_decay_steps: Number of steps to decay epsilon over batch_size: Size of batches to sample from the replay memory record_video_every: Record a video every N episodes Returns: An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards. """ mission_file = agent_host.getStringArgument('mission_file') with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning memory' # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "checkpoints") print("Checkpoint dir is:", checkpoint_dir) saver = tf.train.Saver() # Load a previous checkpoint if we find one # latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) # print("~~~~~~~~~~~~~~", latest_checkpoint) # exit(0) latest_checkpoint = os.path.join(checkpoint_dir, "model") if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) total_t = sess.run(tf.contrib.framework.get_global_step()) # The epsilon decay schedule epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) # The policy we're following policy = make_epsilon_greedy_policy(q_estimator, len(actionSet)) my_mission_record = malmoutils.get_default_recording_object( agent_host, "save_%s-rep" % (expID)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) print("Sleeping") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() agent_host.sendCommand("look -1") agent_host.sendCommand("look -1") while world_state.is_mission_running and all( e.text == '{}' for e in world_state.observations): print("Sleeping....") world_state = agent_host.peekWorldState() # Populate the replay memory with initial experience while world_state.number_of_observations_since_last_state <= 0 and world_state.is_mission_running: # print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() state = gridProcess( world_state ) # MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) stepNum = 0 while world_state.is_mission_running: action = randint(0, 3) print("actions:", action) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action # print("Sending command: ", actionSet[action]) print("Step %s" % stepNum) stepNum += 1 agent_host.sendCommand(actionSet[action]) world_state = agent_host.peekWorldState() num_frames_seen = world_state.number_of_video_frames_since_last_state while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen: world_state = agent_host.peekWorldState() if world_state.is_mission_running: # Getting the reward from taking a step while world_state.number_of_observations_since_last_state <= 0: time.sleep(0.1) world_state = agent_host.peekWorldState() # world_state = agent_host.getWorldState() next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:, :, 1:], np.expand_dims(next_state, 2), axis=2) state = next_state # time.sleep(1) return None
# See if we can parse our extended command line. malmoutils.parse_command_line(agentHost) # As we are not recording our video xml should be an empty string. assert malmoutils.get_video_xml(agentHost) == '' # Test that we can get a default recording spec. assert type(malmoutils.get_default_recording_object( agentHost, "test")) == MalmoPython.MissionRecordSpec # Default recordings directory is ''. assert malmoutils.get_recordings_directory(agentHost) == '' def clientInfos(cp): return [(c.ip_address, c.control_port, c.command_port) for c in cp.clients] # Test adding some client infos to a client pool. clientPool = MalmoPython.ClientPool() assert len(clientPool.clients) == 0 c1 = ("localhost", 10000, 0) client1 = MalmoPython.ClientInfo(*c1) clientPool.add(client1) assert clientInfos(clientPool) == [c1] c2 = ("127.0.0.1", 10001, 20001) client2 = MalmoPython.ClientInfo(*c2) clientPool.add(client2) assert clientInfos(clientPool) == [c1, c2]
def cwc_run_mission(args): print("Calling cwc_run_mission with args:", args, "\n") start_time = time.time() builder_ip, builder_port = args["builder_ip_addr"], args["builder_port"] architect_ip, architect_port = args["architect_ip_addr"], args[ "architect_port"] fixed_viewer_ip, fixed_viewer_port, num_fixed_viewers = args[ "fixed_viewer_ip_addr"], args["fixed_viewer_port"], args[ "num_fixed_viewers"] draw_inventory_blocks = args["draw_inventory_blocks"] existing_is_gold = args["existing_is_gold"] create_target_structures = args["create_target_structures"] builder_idx = 0 if create_target_structures else 1 if create_target_structures and os.path.isfile(args["gold_config"]): print( "ERROR: attempting to create target structure", args["gold_config"], "but it already exists! Please update the configs_csv file to include file paths for NEW target structures only." ) sys.exit(0) # Create agent hosts: agent_hosts = [] for i in range((3 + num_fixed_viewers) if not create_target_structures else 1): agent_hosts.append(MalmoPython.AgentHost()) # Set observation policy for builder agent_hosts[builder_idx].setObservationsPolicy( MalmoPython.ObservationsPolicy.KEEP_ALL_OBSERVATIONS) # Set up a client pool client_pool = MalmoPython.ClientPool() if not args["lan"]: print("Starting in local mode.") client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) if not create_target_structures: client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10002)) for i in range(num_fixed_viewers): client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10003 + i)) else: print("Builder IP: " + builder_ip, "\tPort:", builder_port) print("Architect IP:", architect_ip, "\tPort:", architect_port) print("FixedViewer IP:", fixed_viewer_ip, "\tPort:", fixed_viewer_port, "\tNumber of clients:", num_fixed_viewers, "\n") if not create_target_structures: client_pool.add( MalmoPython.ClientInfo(architect_ip, architect_port + 1)) client_pool.add(MalmoPython.ClientInfo(builder_ip, builder_port)) client_pool.add( MalmoPython.ClientInfo(architect_ip, architect_port)) for i in range(num_fixed_viewers): client_pool.add( MalmoPython.ClientInfo(fixed_viewer_ip, fixed_viewer_port + i)) else: client_pool.add(MalmoPython.ClientInfo(builder_ip, builder_port)) # experiment ID player_ids = "B" + args["builder_id"] + "-A" + args["architect_id"] config_id = os.path.basename(args["gold_config"]).replace(".xml", "") experiment_time = str(int(round(time.time() * 1000))) experiment_id = player_ids + "-" + config_id + "-" + experiment_time # obtain xml substrings gold_config_xml_substring = io_utils.readXMLSubstringFromFile( args["gold_config"], False) if not create_target_structures else "" existing_config_xml_substring = io_utils.readXMLSubstringFromFile( args["existing_config"], existing_is_gold) # construct mission xml missionXML = generateMissionXML(experiment_id, existing_config_xml_substring, num_fixed_viewers, draw_inventory_blocks, create_target_structures) missionXML_oracle = generateOracleXML(experiment_id, gold_config_xml_substring) if not create_target_structures: # oracle my_mission_oracle = MalmoPython.MissionSpec(missionXML_oracle, True) mission_utils.safeStartMission(agent_hosts[0], my_mission_oracle, client_pool, MalmoPython.MissionRecordSpec(), 0, "cwc_dummy_mission_oracle") # builder, architect my_mission = MalmoPython.MissionSpec(missionXML, True) mission_utils.safeStartMission(agent_hosts[builder_idx], my_mission, client_pool, MalmoPython.MissionRecordSpec(), 0, "cwc_dummy_mission") if not create_target_structures: mission_utils.safeStartMission(agent_hosts[2], my_mission, client_pool, MalmoPython.MissionRecordSpec(), 1, "cwc_dummy_mission") # fixed viewers for i in range(num_fixed_viewers): mission_utils.safeStartMission(agent_hosts[3 + i], my_mission, client_pool, MalmoPython.MissionRecordSpec(), 2 + i, "cwc_dummy_mission") mission_utils.safeWaitForStart(agent_hosts) # poll for observations timed_out = False all_observations = [] while not timed_out: for i in range(( 3 + num_fixed_viewers) if not create_target_structures else 1): ah = agent_hosts[i] world_state = ah.getWorldState() if not world_state.is_mission_running: timed_out = True elif i == builder_idx and world_state.number_of_observations_since_last_state > 0: total_elements = 0 for observation in world_state.observations: total_elements += len(json.loads(observation.text)) print("Received", len(world_state.observations), "observations. Total number of elements:", total_elements) for observation in world_state.observations: print("Processing observation:", ) debug_utils.printObservationElements( json.loads(observation.text)) all_observations.append(observation) print("-----") time.sleep(1) time_elapsed = time.time() - start_time agent_hosts[0].sendCommand("chat /kill") print("Mission has been quit. All world states:\n") all_world_states = [] for observation in all_observations: world_state = json.loads(observation.text) world_state["Timestamp"] = observation.timestamp.replace( microsecond=0).isoformat(' ') debug_utils.prettyPrintObservation(world_state) all_world_states.append(world_state) raw_observations = { "WorldStates": all_world_states, "TimeElapsed": time_elapsed, "NumFixedViewers": num_fixed_viewers } if not create_target_structures: io_utils.writeJSONtoLog(experiment_id, "raw-observations.json", raw_observations) else: reformatted = reformatObservations(raw_observations.get("WorldStates")) merged = mergeObservations(reformatted) _ = postprocess(merged, False) time_elapsed = raw_observations.get("TimeElapsed") m, s = divmod(time_elapsed, 60) h, m = divmod(m, 60) raw_observations["WorldStates"] = merged print(json.dumps(raw_observations, indent=4)) xml_str = get_gold_config_xml(raw_observations) if len(xml_str) > 0: with open(args['gold_config'], 'w') as f: f.write(xml_str) with open( os.path.join('../../../../cwc-minecraft-models/data', args['gold_config']), 'w') as f: f.write(xml_str) print("Wrote gold configuration to", args["gold_config"], " (" + str(len(xml_str.split('\n')) - 1) + ' blocks)') else: print( "WARNING: creating target structures: created structure was empty. Configuration", args["gold_config"], "not saved.") m, s = divmod(time_elapsed, 60) h, m = divmod(m, 60) print("Done! Mission time elapsed: %d:%02d:%02d (%.2fs)\n" % (h, m, s, time_elapsed)) print("Waiting for mission to end...") # Mission should have ended already, but we want to wait until all the various agent hosts # have had a chance to respond to their mission ended message. hasEnded = False while not hasEnded: hasEnded = True # assume all good sys.stdout.write('.') time.sleep(0.1) for ah in agent_hosts[1:3]: world_state = ah.getWorldState() if world_state.is_mission_running: hasEnded = False # all not good print("Mission ended") # Mission has ended. time.sleep(2)
<Mob type="Skeleton" reward="1"/> </RewardForDamagingEntity> <ObservationFromNearbyEntities> <Range name="entities" xrange="''' + str( ARENA_WIDTH) + '''" yrange="2" zrange="''' + str( ARENA_BREADTH) + '''" /> </ObservationFromNearbyEntities> <ObservationFromFullStats/>''' + video_requirements + ''' </AgentHandlers> </AgentSection> </Mission>''' validate = True my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000)) episode_reward = 0 if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 10000 fout = open('results.csv', 'w') # Set up the agent agent = agentMC.agentMC(agent_host, MAX_ZOMBIES, MAX_DISTANCE, 20) for i in range(num_reps): print('episode:', i) for iRepeat in range(1, MAX_ZOMBIES): ######################################### # Set up the enviornment #
def __init__(self, xml): self.timer = 0 self.start_time = 0 self.agents = [] self.clientPool = MalmoPython.ClientPool() self.missionXML = xml
def deep_q_learning(sess, agent_host, q_estimator, target_estimator, state_processor, num_episodes, experiment_dir, replay_memory_size=500000, replay_memory_init_size=50000, update_target_estimator_every=10000, discount_factor=0.99, epsilon_start=1.0, epsilon_end=0.1, epsilon_decay_steps=50000, batch_size=32, record_video_every=100): """ Q-Learning algorithm for off-policy TD control using Function Approximation. Finds the optimal greedy policy while following an epsilon-greedy policy. Args: sess: Tensorflow Session object env: OpenAI environment q_estimator: Estimator object used for the q values target_estimator: Estimator object used for the targets state_processor: A StateProcessor object num_episodes: Number of episodes to run for experiment_dir: Directory to save Tensorflow summaries in replay_memory_size: Size of the replay memory replay_memory_init_size: Number of random experiences to sampel when initializing the reply memory. update_target_estimator_every: Copy parameters from the Q estimator to the target estimator every N steps discount_factor: Gamma discount factor epsilon_start: Chance to sample a random action when taking an action. Epsilon is decayed over time and this is the start value epsilon_end: The final minimum value of epsilon after decaying is done epsilon_decay_steps: Number of steps to decay epsilon over batch_size: Size of batches to sample from the replay memory record_video_every: Record a video every N episodes Returns: An EpisodeStats object with two numpy arrays for episode_lengths and episode_rewards. """ mission_file = agent_host.getStringArgument('mission_file') with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning memory' Transition = namedtuple("Transition", ["state", "action", "reward", "next_state", "done"]) # The replay memory replay_memory = [] # Keeps track of useful statistics stats = plotting.EpisodeStats( episode_lengths=np.zeros(num_episodes), episode_rewards=np.zeros(num_episodes)) # Create directories for checkpoints and summaries checkpoint_dir = os.path.join(experiment_dir, "checkpoints") checkpoint_path = os.path.join(checkpoint_dir, "model") monitor_path = os.path.join(experiment_dir, "monitor") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if not os.path.exists(monitor_path): os.makedirs(monitor_path) saver = tf.train.Saver() # Load a previous checkpoint if we find one latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) total_t = sess.run(tf.contrib.framework.get_global_step()) # The epsilon decay schedule epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) # The policy we're following policy = make_epsilon_greedy_policy( q_estimator, len(actionSet)) my_mission_record = malmoutils.get_default_recording_object(agent_host, "./save_%s-rep" % (expID)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" %(expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) print("Sleeping") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() # world_state = agent_host.peekWorldState() while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): print("Sleeping....") world_state = agent_host.peekWorldState() world_state = agent_host.getWorldState() # Populate the replay memory with initial experience print("Populating replay memory...") while world_state.number_of_observations_since_last_state <= 0: # print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() state = gridProcess(world_state) #MALMO ENVIRONMENT Grid world NEEDED HERE/ was env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) for i in range(replay_memory_init_size): print("%s th replay memory" %i) action_probs = policy(sess, state, epsilons[min(total_t, epsilon_decay_steps-1)]) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo send command for the action # print("Sending command: ", actionSet[action]) agent_host.sendCommand(actionSet[action]) #checking if the mission is done world_state = agent_host.peekWorldState() #Getting the reward from taking a step if world_state.number_of_rewards_since_last_state > 0: reward = world_state.rewards[-1].getValue() print("Just received the reward: %s on action: %s "%(reward, actionSet[action])) else: print("No reward") reward = 0 #getting the next state while world_state.number_of_observations_since_last_state <=0 and world_state.is_mission_running: print("Sleeping") time.sleep(0.1) world_state = agent_host.peekWorldState() if world_state.is_mission_running: next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2) done = not world_state.is_mission_running replay_memory.append(Transition(state, action, reward, next_state, done)) state = next_state else: for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s" % (expID)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() world_state = agent_host.peekWorldState() while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): world_state = agent_host.peekWorldState() world_state = agent_host.getWorldState() if not world_state.is_mission_running: print("Breaking") break state = gridProcess(world_state) # Malmo GetworldState? / env.reset() state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) print("Finished populating memory") # Record videos # Use the gym env Monitor wrapper # env = Monitor(env, # directory=monitor_path, # resume=True, # video_callable=lambda count: count % record_video_every ==0) # NEED TO RECORD THE VIDEO AND SAVE TO THE SPECIFIED DIRECTORY for i_episode in range(num_episodes): print("%s-th episode"%i_episode) if i_episode != 0: mission_file = agent_host.getStringArgument('mission_file') with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) my_mission.removeAllCommandHandlers() my_mission.allowAllDiscreteMovementCommands() # my_mission.requestVideo(320, 240) my_mission.forceWorldReset() my_mission.setViewpoint(2) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available max_retries = 3 agentID = 0 expID = 'Deep_q_learning ' my_mission_record = malmoutils.get_default_recording_object(agent_host, "./save_%s-rep%d" % (expID, i)) for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, agentID, "%s-%d" % (expID, i)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2.5) world_state = agent_host.getWorldState() print("Waiting for the mission to start", end=' ') while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) # Save the current checkpoint saver.save(tf.get_default_session(), checkpoint_path) # world_state = agent_host.getWorldState() # Reset the environment # world_state = agent_host.peekWorldState() while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): # print("Sleeping!!!") world_state = agent_host.peekWorldState() world_state = agent_host.getWorldState() state = gridProcess(world_state) #MalmoGetWorldState? state = state_processor.process(sess, state) state = np.stack([state] * 4, axis=2) loss = None # One step in the environment for t in itertools.count(): # Epsilon for this time step epsilon = epsilons[min(total_t, epsilon_decay_steps-1)] # Add epsilon to Tensorboard episode_summary = tf.Summary() episode_summary.value.add(simple_value=epsilon, tag="epsilon") q_estimator.summary_writer.add_summary(episode_summary, total_t) # Maybe update the target estimator if total_t % update_target_estimator_every == 0: copy_model_parameters(sess, q_estimator, target_estimator) print("\nCopied model parameters to target network.") # Print out which step we're on, useful for debugging. print("\rStep {} ({}) @ Episode {}/{}, loss: {}".format( t, total_t, i_episode + 1, num_episodes, loss), end="") sys.stdout.flush() # Take a step action_probs = policy(sess, state, epsilon) action = np.random.choice(np.arange(len(action_probs)), p=action_probs) # next_state, reward, done, _ = env.step(actionSet[action]) # Malmo AgentHost send command? # print("Sending command: ", actionSet[action]) agent_host.sendCommand(actionSet[action]) world_state = agent_host.peekWorldState() if world_state.number_of_rewards_since_last_state > 0: reward = world_state.rewards[-1].getValue() print("Just received the reward: %s on action: %s " % (reward, actionSet[action])) else: print("No reward") reward = 0 while world_state.is_mission_running and all(e.text == '{}' for e in world_state.observations): # print("Sleeping!!!") world_state = agent_host.peekWorldState() # world_state = agent_host.getWorldState() # if not world_state.is_mission_running: # print("Breaking") # break done = not world_state.is_mission_running print(" IS MISSION FINISHED? ", done) # if done: # print("Breaking before updating last reward") # break next_state = gridProcess(world_state) next_state = state_processor.process(sess, next_state) next_state = np.append(state[:,:,1:], np.expand_dims(next_state, 2), axis=2) # If our replay memory is full, pop the first element if len(replay_memory) == replay_memory_size: replay_memory.pop(0) # Save transition to replay memory replay_memory.append(Transition(state, action, reward, next_state, done)) # Update statistics stats.episode_rewards[i_episode] += reward stats.episode_lengths[i_episode] = t # Sample a minibatch from the replay memory samples = random.sample(replay_memory, batch_size) states_batch, action_batch, reward_batch, next_states_batch, done_batch = map(np.array, zip(*samples)) # Calculate q values and targets (Double DQN) q_values_next = q_estimator.predict(sess, next_states_batch) best_actions = np.argmax(q_values_next, axis=1) q_values_next_target = target_estimator.predict(sess, next_states_batch) targets_batch = reward_batch + np.invert(done_batch).astype(np.float32) * \ discount_factor * q_values_next_target[np.arange(batch_size), best_actions] # Perform gradient descent update states_batch = np.array(states_batch) loss = q_estimator.update(sess, states_batch, action_batch, targets_batch) if done: print("End of Episode") break state = next_state total_t += 1 # Add summaries to tensorboard episode_summary = tf.Summary() episode_summary.value.add(simple_value=stats.episode_rewards[i_episode], node_name="episode_reward", tag="episode_reward") episode_summary.value.add(simple_value=stats.episode_lengths[i_episode], node_name="episode_length", tag="episode_length") q_estimator.summary_writer.add_summary(episode_summary, total_t) q_estimator.summary_writer.flush() yield total_t, plotting.EpisodeStats( episode_lengths=stats.episode_lengths[:i_episode+1], episode_rewards=stats.episode_rewards[:i_episode+1]) # env.monitor.close() return stats
</Mission> ''' return missionXML.format(src=seedfile, limit=timelimit, xcoord=random.randint(0,300), zcoord=random.randint(100, 350), tlimit=eptime) agent_id = 10001 counter = 9019 while counter < numphotos: agent_host = MalmoPython.AgentHost() try: missionXML = generateXMLbySeed() my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission_record = MalmoPython.MissionRecordSpec() except Exception as e: print("open mission ERROR: ", e) my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) # add Minecraft machines here as available agent_id += 1 # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "IMGCOLLECTOR") break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2)
def main(get_agent0_action, get_agent1_action): client_pool = MalmoPython.ClientPool() client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) agent_host0 = MalmoPython.AgentHost() agent_host0.setObservationsPolicy( MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) agent_host1 = MalmoPython.AgentHost() agent_host1.setObservationsPolicy( MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) mission_file = './hw2.xml' my_mission = None with open(mission_file, 'r') as f: print "Loading mission from %s" % mission_file mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host0.startMission(my_mission, client_pool, MalmoPython.MissionRecordSpec(), 0, '') break except RuntimeError as e: if retry == max_retries - 1: print "Error starting mission:", e exit(1) else: time.sleep(2) time.sleep(10) max_retries = 30 for retry in range(max_retries): try: agent_host1.startMission(my_mission, client_pool, MalmoPython.MissionRecordSpec(), 1, '') break except RuntimeError as e: if retry == max_retries - 1: print "Error starting mission:", e exit(1) else: time.sleep(2) # Loop until mission starts: print "Waiting for the mission to start ", world_state0 = agent_host0.peekWorldState() while not world_state0.is_mission_running: sys.stdout.write(".") time.sleep(0.1) world_state0 = agent_host0.peekWorldState() for error in world_state0.errors: print "Error:", error.text print print "Mission running ", print cookie_counts = [0, 0] while world_state0.is_mission_running: time.sleep(2.0) world_state0 = agent_host0.getWorldState() if world_state0.is_mission_running: if cookie_counts[0] + cookie_counts[1] >= num_cookies: break msg = world_state0.observations[-1].text grid, count = extract_observation(msg) s = PickupState(grid, 0, cookie_counts) action = get_agent0_action(s) print 'Agent0 taking action: {0}'.format(action) for command in actions[action]: agent_host0.sendCommand(command) time.sleep(0.1) world_state0 = agent_host0.peekWorldState() msg = world_state0.observations[-1].text grid, count = extract_observation(msg) cookie_counts[0] = count time.sleep(2.0) world_state1 = agent_host1.getWorldState() if world_state1.is_mission_running: if cookie_counts[0] + cookie_counts[1] >= num_cookies: break msg = world_state1.observations[-1].text grid, count = extract_observation(msg) cookie_counts[1] = count print cookie_counts if cookie_counts[0] + cookie_counts[1] >= num_cookies: break s = PickupState(grid, 1, cookie_counts) action = get_agent1_action(s) print 'Agent1 taking action: {0}'.format(action) for command in actions[action]: agent_host1.sendCommand(command) time.sleep(0.1) world_state1 = agent_host1.peekWorldState() msg = world_state1.observations[-1].text grid, count = extract_observation(msg) cookie_counts[1] = count if cookie_counts[0] > cookie_counts[1]: print "Agent0 wins with a score of {0} - {1}".format( cookie_counts[0], cookie_counts[1]) elif cookie_counts[0] == cookie_counts[1]: print "Tie with a score of {0} - {1}".format(cookie_counts[0], cookie_counts[1]) elif cookie_counts[0] < cookie_counts[1]: print "Agent1 wins with a score of {0} - {1}".format( cookie_counts[0], cookie_counts[1])