from past.utils import old_div import MalmoPython import os import random import sys import time import json import random import errno import math import malmoutils import numpy as np import agentMC malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host) recordingsDirectory = malmoutils.get_recordings_directory(agent_host) video_requirements = '<VideoProducer><Width>860</Width><Height>480</Height></VideoProducer>' if agent_host.receivedArgument( "record_video") else '' # Task parameters: MAX_DISTANCE = 40 MAX_ZOMBIES = 16 ####### SPEED OF GAME ####### SPEED = 8 ARENA_WIDTH = MAX_DISTANCE ARENA_BREADTH = MAX_DISTANCE
from builtins import range from past.utils import old_div import MalmoPython import os import random import sys import time import json import copy import errno import xml.etree.ElementTree from collections import deque import malmoutils malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host) recordingsDirectory = malmoutils.get_recordings_directory(agent_host) # Set up some pallettes: colourful=["stained_glass", "diamond_block", "lapis_block", "gold_block", "redstone_block", "obsidian"] fiery=["stained_glass WHITE", "stained_glass PINK", "stained_glass ORANGE", "stained_glass RED", "wool BLACK", "glowstone"] oresome=["gold_ore", "lapis_ore", "iron_ore", "emerald_ore", "redstone_ore", "quartz_ore"] frilly=["skull", "stained_glass WHITE", "wool PINK", "wool WHITE", "stained_hardened_clay PINK", "stained_hardened_clay WHITE"] icepalace=["ice", "stained_glass", "stained_glass", "stained_glass", "stained_glass", "snow"] volatile=["tnt", "stained_glass", "stained_glass", "redstone_block", "stained_glass", "stained_glass"] oak=["planks", "planks", "planks", "planks", "lapis_block", "lapis_block"] sponge=["sponge", "glass", "sponge", "glass", "sponge", "glass"] palletes = [colourful, fiery, oresome, frilly, icepalace, volatile, oak, sponge]
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec() my_mission.timeLimitInSeconds( 10 ) my_mission.requestVideo( 320, 240 ) my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:",error.text) print() last_delta = time.time() # main loop: while world_state.is_mission_running: agent_host.sendCommand( "move 1" ) agent_host.sendCommand( "turn " + str(random.random()*2-1) ) time.sleep(0.5) world_state = agent_host.getWorldState() print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:",reward.getValue()) for error in world_state.errors: print("Error:",error.text) for frame in world_state.video_frames: print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec() my_mission.timeLimitInSeconds( 10 ) my_mission.requestVideo( 320, 240 ) my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object(agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:",e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:",error.text) print() last_delta = time.time() # main loop: while world_state.is_mission_running: agent_host.sendCommand( "move 1" ) agent_host.sendCommand( "turn " + str(random.random()*2-1) ) time.sleep(0.5) world_state = agent_host.getWorldState() print("video,observations,rewards received:",world_state.number_of_video_frames_since_last_state,world_state.number_of_observations_since_last_state,world_state.number_of_rewards_since_last_state) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:",reward.getValue()) for error in world_state.errors: print("Error:",error.text) for frame in world_state.video_frames: print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
def main(agent_host): device = torch.device("cpu") if VISION_ENABLED: eyes = Eyes() if GET_VISION_DATA: clear_images() malmoutils.fix_print() malmoutils.parse_command_line(agent_host) recordingsDirectory = malmoutils.get_recordings_directory(agent_host) q_network = QNetwork((2, Hyperparameters.OBS_SIZE, Hyperparameters.OBS_SIZE), len(Hyperparameters.ACTION_DICT)) target_network = QNetwork((2, Hyperparameters.OBS_SIZE, Hyperparameters.OBS_SIZE), len(Hyperparameters.ACTION_DICT)) target_network.load_state_dict(q_network.state_dict()) optim = torch.optim.Adam(q_network.parameters(), lr= Hyperparameters.LEARNING_RATE) replay_buffer = deque(maxlen=Hyperparameters.REPLAY_BUFFER_SIZE) global_step = 0 num_episode = 0 epsilon = 1 start_time = time.time() returns = [] steps = [] loss_array = [] loop = tqdm(total=Hyperparameters.MAX_GLOBAL_STEPS, position=0, leave=False) result_dataset = [] print("Global Step", Hyperparameters.MAX_GLOBAL_STEPS) while global_step < Hyperparameters.MAX_GLOBAL_STEPS: episode_step = 0 episode_return = 0 episode_loss = 0 done = False #Initialize agent_host = init_malmo(agent_host,recordingsDirectory, video_width,video_height) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: time.sleep(0.1) world_state = agent_host.getWorldState() #for error in world_state.errors: #print("\nError:",error.text) obs = get_observation(world_state, agent_host) #Testing agent_host.sendCommand( "move 1" ) while world_state.is_mission_running: #Depth Implementation while world_state.number_of_video_frames_since_last_state < 1 and world_state.is_mission_running: time.sleep(0.05) world_state = agent_host.getWorldState() if world_state.is_mission_running: frame = world_state.video_frames[0].pixels processFrame(frame) if GET_VISION_DATA: try: result_dataset.append(view_surrounding(video_height, video_width, frame, global_step)) except: print("Error in getting image for training data.") elif VISION_ENABLED: input_img_temp = get_img(world_state,frame,agent_host,eyes,device,video_width,video_height) print("Yaw Delta ", current_yaw_delta_from_depth) if current_yaw_delta_from_depth > 0: agent_host.sendCommand(Hyperparameters.ACTION_DICT[1]) else: agent_host.sendCommand(Hyperparameters.ACTION_DICT[2]) action_idx = get_action(obs, q_network, epsilon) command = Hyperparameters.ACTION_DICT[action_idx] agent_host.sendCommand(command) #agent_host.sendCommand( "turn " + str(current_yaw_delta_from_depth) ) #time.sleep(.3) episode_step += 1 if episode_step >= Hyperparameters.MAX_EPISODE_STEPS or \ (obs[0, int(Hyperparameters.OBS_SIZE/2)+1, int(Hyperparameters.OBS_SIZE/2)] == -1 and \ command == 'movesouth 1'): done = True time.sleep(2) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) next_obs = get_observation(world_state, agent_host) reward = 0 for r in world_state.rewards: reward += r.getValue() episode_return += reward replay_buffer.append((obs, action_idx, next_obs, reward, done)) obs = next_obs global_step += 1 #print(global_step) if global_step == Hyperparameters.MAX_GLOBAL_STEPS: break if global_step > Hyperparameters.START_TRAINING and global_step % Hyperparameters.LEARN_FREQUENCY == 0: batch = prepare_batch(replay_buffer) loss = learn(batch, optim, q_network, target_network) episode_loss += loss if epsilon > Hyperparameters.MIN_EPSILON: epsilon *= Hyperparameters.EPSILON_DECAY if global_step % Hyperparameters.TARGET_UPDATE == 0: target_network.load_state_dict(q_network.state_dict()) num_episode += 1 returns.append(episode_return) loss_array.append(episode_loss) steps.append(global_step) avg_return = sum(returns[-min(len(returns), 10):]) / min(len(returns), 10) loop.update(episode_step) loop.set_description('Episode: {} Steps: {} Time: {:.2f} Loss: {:.2f} Last Return: {:.2f} Avg Return: {:.2f}'.format( num_episode, global_step, (time.time() - start_time) / 60, episode_loss, episode_return, avg_return)) if num_episode > 0 and num_episode % 10 == 0: log_returns(steps, loss_array) #print() #print(len(result_dataset)) np.save("images/image_labels",np.array(result_dataset))
def run(argv=['']): if "MALMO_XSD_PATH" not in os.environ: print("Please set the MALMO_XSD_PATH environment variable.") return #forceReset="true" missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hello world!</Summary> </About> <ServerSection> <ServerHandlers> <DefaultWorldGenerator forceReset="true" /> <ServerQuitFromTimeUp timeLimitMs="30000"/> <ServerQuitWhenAnyAgentFinishes/> </ServerHandlers> </ServerSection> <AgentSection mode="Survival"> <Name>MalmoTutorialBot</Name> <AgentStart> <Inventory> <InventoryItem slot="8" type="diamond_pickaxe"/> </Inventory> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <ObservationFromGrid> <Grid name="all_the_blocks" > <min x="-1" y="-1" z="-1"/> <max x="1" y="2" z="1"/> </Grid> </ObservationFromGrid> <ContinuousMovementCommands turnSpeedDegs="180"/> </AgentHandlers> </AgentSection> </Mission>''' malmoutils.fix_print() #agent_host = MalmoPython.AgentHost() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host, argv) my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission.timeLimitInSeconds(300) my_mission.requestVideo(640, 480) #my_mission.rewardForReachingPosition( 19.5, 0.0, 19.5, 100.0, 1.1 ) my_mission_record = malmoutils.get_default_recording_object( agent_host, "saved_data") # client_info = MalmoPython.ClientInfo('localhost', 10000) client_info = MalmoPython.ClientInfo('127.0.0.1', 10000) pool = MalmoPython.ClientPool() pool.add(client_info) experiment_id = str(uuid.uuid1()) print("experiment id " + experiment_id) max_retries = 3 max_response_time = 60 # seconds for retry in range(max_retries): try: agent_host.startMission(my_mission, pool, my_mission_record, 0, experiment_id) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission:", e) exit(1) else: time.sleep(2) print("Waiting for the mission to start", end=' ') start_time = time.time() world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) if time.time() - start_time > max_response_time: print("Max delay exceeded for mission to begin") restart_minecraft(world_state, agent_host, client_info, "begin mission") world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() last_delta = time.time() # main loop: #agent_host.sendCommand( "jump 1") TURN = 0 TURN2 = 0 JUMP = 0 while world_state.is_mission_running: print("New Iteration") if JUMP > 0: JUMP = JUMP - 1 if JUMP == 0: agent_host.sendCommand("jump 0") JUMP = JUMP - 1 agent_host.sendCommand("move 1") if math.sin(TURN) / 3 >= 0: agent_host.sendCommand("turn 0.15") else: agent_host.sendCommand("turn -0.2") print(TURN, " ", math.sin(TURN)) TURN = TURN + 0.3 #agent_host.sendCommand( "jump 1" ) time.sleep(0.5) world_state = agent_host.getWorldState() y = json.loads(world_state.observations[-1].text) #print(y["all_the_blocks"]) dir = "" if y["Yaw"] + 180 < 90: dir = "S" print("Facing South") elif y["Yaw"] < 180: dir = "W" print("Facing West") elif y["Yaw"] < 270: dir = "N" print("Facing North") else: dir = "E" print("Facing East") blocks = [[], [], [], []] i = 0 for x in y["all_the_blocks"]: blocks[math.floor(i / 9)].append(x) i = i + 1 if dir == "S": willjump = False for j in range(0, 3): if blocks[1][j] != "air": willjump = True print(j, blocks[1][j], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "W": willjump = False for j in range(0, 3): if blocks[1][j * 3] != "air": willjump = True print(j * 3, blocks[1][j * 3], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "E": willjump = False for j in range(1, 4): if blocks[1][j * 3 - 1] != "air": willjump = True print(j * 3 - 1, blocks[1][j * 3 - 1], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") elif dir == "N": willjump = False for j in range(0, 3): if blocks[1][j] != "air": willjump = True print(j, blocks[1][j + 6], willjump) if willjump: JUMP = 2 agent_host.sendCommand("jump 1") if (blocks[1][2] != "air" and blocks[2][2] != "air" or blocks[1][4] != "air" and blocks[2][4] != "air" or blocks[1][2] != "air" and blocks[2][2] != "air" or blocks[1][4] != "air" and blocks[2][4] != "air"): TURN2 = 2 if TURN2 >= 0: agent_host.sendCommand("turn 1") TURN2 = TURN2 - 1 '''if blocks[1][5] != "air" or blocks[1][5] != "grass" or blocks[1][5] != "tallgrass" : JUMP = 2 agent_host.sendCommand( "jump 1" ) print() print(blocks[1][5])''' #print(len(blocks)) #print(blocks) if (world_state.number_of_video_frames_since_last_state > 0 or world_state.number_of_observations_since_last_state > 0 or world_state.number_of_rewards_since_last_state > 0): last_delta = time.time() else: if time.time() - last_delta > max_response_time: print("Max delay exceeded for world state change") restart_minecraft(world_state, agent_host, client_info, "world state change") for reward in world_state.rewards: print("Summed reward:", reward.getValue()) for error in world_state.errors: print("Error:", error.text) for frame in world_state.video_frames: print() #print("Frame:",frame.width,'x',frame.height,':',frame.channels,'channels') #image = Image.frombytes('RGB', (frame.width, frame.height), bytes(frame.pixels) ) # to convert to a PIL image print("Mission has stopped.")
def run(size, algo1, algo2): #algorithms = {"reflex": reflex.reflex, "hiddenMarkov": hiddenMarkov.hiddenMarkov, "minimax":minimax.minimax, "expectimax": expectimax.expectimax} algorithms = { "reflex": reflex.reflex, 'random': randomagent.randommove, 'smartrandom': smartrandomagent.randommove, 'astarreflex': AStarReflex.search, "minimax": minimax.minmax } #assert len(sys.argv) == 4, "Wrong number of arguments, the form is: mapSize, agent algorithm, enemy alogrithm" malmoutils.fix_print() # -- set up two agent hosts -- agent_host1 = MalmoPython.AgentHost() agent_host2 = MalmoPython.AgentHost() #map_size = str(sys.argv[1]) map_size = int(size) map_minus = str(map_size - 1) agentAlgo = algorithms[algo1] enemyAlgo = algorithms[algo2] #agentAlgo = algorithms[sys.argv[2]] #enemyAlgo = algorithms[sys.argv[3]] # Use agent_host1 for parsing the command-line options. # (This is why agent_host1 is passed in to all the subsequent malmoutils calls, even for # agent 2's setup.) malmoutils.parse_command_line(agent_host1) missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hello world!</Summary> </About> <ServerSection> <ServerInitialConditions> <Time> <StartTime>12000</StartTime> <AllowPassageOfTime>false</AllowPassageOfTime> </Time> </ServerInitialConditions> <ServerHandlers> <FlatWorldGenerator generatorString="3;7,220*1,5*3,2;3;,biome_1"/> <DrawingDecorator> <!-- coordinates for cuboid are inclusive --> <DrawCuboid x1="0" y1="45" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="300" z2=''' + '"' + map_minus + '"' + ''' type="air" /> <!-- limits of our arena --> <DrawCuboid x1="0" y1="40" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="44" z2=''' + '"' + map_minus + '"' + ''' type="lava" /> <!-- lava floor --> <DrawCuboid x1="0" y1="46" z1="0" x2=''' + '"' + map_minus + '"' + ''' y2="46" z2=''' + '"' + map_minus + '"' + ''' type="snow" /> </DrawingDecorator> <ServerQuitFromTimeUp timeLimitMs="30000"/> </ServerHandlers> </ServerSection> <AgentSection mode="Survival"> <Name>Agent</Name> <AgentStart> <Inventory> <InventoryItem slot="0" type="diamond_shovel"/> </Inventory> <Placement x="0.5" y="47.0" z="0.5" pitch="50" yaw="0"/> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <ObservationFromGrid> <Grid name="floor3x3W"> <min x="-1" y="0" z="-1"/> <max x="1" y="0" z="1"/> </Grid> <Grid name="floor3x3F"> <min x="-1" y="-1" z="-1"/> <max x="1" y="-1" z="1"/> </Grid> </ObservationFromGrid> <DiscreteMovementCommands/> </AgentHandlers> </AgentSection> <AgentSection mode="Survival"> <Name>Enemy</Name> <AgentStart> <Inventory> <InventoryItem slot="0" type="diamond_shovel"/> </Inventory> <Placement x=''' + '"' + str( float(map_size) - 0.5) + '"' + ''' y="47.0" z=''' + '"' + str( float(map_size) - 0.5) + '"' + ''' pitch="50" yaw="180"/> </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <DiscreteMovementCommands/> <ObservationFromGrid> <Grid name="floor3x3W"> <min x="-1" y="0" z="-1"/> <max x="1" y="0" z="1"/> </Grid> <Grid name="floor3x3F"> <min x="-1" y="-1" z="-1"/> <max x="1" y="-1" z="1"/> </Grid> </ObservationFromGrid> <RewardForTouchingBlockType> <Block reward="-100.0" type="lava" behaviour="onceOnly"/> </RewardForTouchingBlockType> <AgentQuitFromTouchingBlockType> <Block type="lava" /> </AgentQuitFromTouchingBlockType> </AgentHandlers> </AgentSection> </Mission>''' # Create default Malmo objects: my_mission = MalmoPython.MissionSpec(missionXML, True) client_pool = MalmoPython.ClientPool() client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) MalmoPython.setLogging("", MalmoPython.LoggingSeverityLevel.LOG_OFF) my_mission_record = MalmoPython.MissionRecordSpec() def safeStartMission(agent_host, mission, client_pool, recording, role, experimentId): used_attempts = 0 max_attempts = 5 print("Calling startMission for role", role) while True: try: agent_host.startMission(mission, client_pool, recording, role, experimentId) break except MalmoPython.MissionException as e: errorCode = e.details.errorCode if errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_WARMING_UP: print("Server not quite ready yet - waiting...") time.sleep(2) elif errorCode == MalmoPython.MissionErrorCode.MISSION_INSUFFICIENT_CLIENTS_AVAILABLE: print("Not enough available Minecraft instances running.") used_attempts += 1 if used_attempts < max_attempts: print("Will wait in case they are starting up.", max_attempts - used_attempts, "attempts left.") time.sleep(2) elif errorCode == MalmoPython.MissionErrorCode.MISSION_SERVER_NOT_FOUND: print( "Server not found - has the mission with role 0 been started yet?" ) used_attempts += 1 if used_attempts < max_attempts: print("Will wait and retry.", max_attempts - used_attempts, "attempts left.") time.sleep(2) else: print("Other error:", e.message) print("Waiting will not help here - bailing immediately.") exit(1) if used_attempts == max_attempts: print("All chances used up - bailing now.") exit(1) print("startMission called okay.") def safeWaitForStart(agent_hosts): print("Waiting for the mission to start", end=' ') start_flags = [False for a in agent_hosts] start_time = time.time() time_out = 120 # Allow two minutes for mission to start. while not all(start_flags) and time.time() - start_time < time_out: states = [a.peekWorldState() for a in agent_hosts] start_flags = [w.has_mission_begun for w in states] errors = [e for w in states for e in w.errors] if len(errors) > 0: print("Errors waiting for mission start:") for e in errors: print(e.text) print("Bailing now.") exit(1) time.sleep(0.1) print(".", end=' ') print() if time.time() - start_time >= time_out: print("Timed out waiting for mission to begin. Bailing.") exit(1) print("Mission has started.") safeStartMission(agent_host1, my_mission, client_pool, my_mission_record, 0, '') safeStartMission(agent_host2, my_mission, client_pool, my_mission_record, 1, '') safeWaitForStart([agent_host1, agent_host2]) def movement(ah, direction, pos): if direction == "north": ah.sendCommand("movenorth 1") position = (pos[0], pos[1] - 1) elif direction == "south": ah.sendCommand("movesouth 1") position = (pos[0], pos[1] + 1) elif direction == "west": ah.sendCommand("movewest 1") position = (pos[0] - 1, pos[1]) elif direction == "east": ah.sendCommand("moveeast 1") position = (pos[0] + 1, pos[1]) else: position = (pos[0], pos[1]) time.sleep(0.1) return position def attack(ah, index, pos, map, enemy=False): #We are going to make it so the agent can only break the blocks immediately around them. #So a location will be one of the 8 locations around it #Enemy starts facing north (1), Agent starts facing south (3) # Enemy: 0 1 0 Agent: 0 3 0 # 4 X 2 2 X 4 # 0 3 0 0 1 0 x, y = math.floor(pos[0]), math.floor(pos[1]) #print("Player position: {},{} Direction: {}".format(x,y, index)) did_Break = False if enemy: if index == "north": # print("Index 1") ah.sendCommand("attack 1") time.sleep(0.1) y -= 1 did_Break = True if index == "east": # print("Index 2") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) x += 1 did_Break = True if index == "west": # print("Index 4") ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) x -= 1 did_Break = True if index == "south": # print("Index 3") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) y += 1 did_Break = True else: # Agent: 0 3 0 # 2 X 4 # 0 1 0 if index == "south": # print("Index 3") ah.sendCommand("attack 1") time.sleep(0.1) y += 1 did_Break = True if index == "west": # print("Index 4") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) x -= 1 did_Break = True if index == "east": # print("Index 2") ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) x += 1 did_Break = True if index == "north": # print("Index 3") ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("turn 1") time.sleep(0.1) ah.sendCommand("attack 1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) ah.sendCommand("turn -1") time.sleep(0.1) y -= 1 did_Break = True if did_Break: map[x][y] = False ''' Sample Observation: {"DistanceTravelled":0,"TimeAlive":50,"MobsKilled":0,"PlayersKilled":0,"DamageTaken":0,"DamageDealt":0, "Life":20.0,"Score":0,"Food":20,"XP":0,"IsAlive":true,"Air":300,"Name":"Enemy","XPos":5.5,"YPos":47.0, "ZPos":5.5,"Pitch":50.0,"Yaw":180.0,"WorldTime":12000,"TotalTime":57} ''' agent_score = 0 #count = 0 agent_ob = None enemy_ob = None map = [[True for i in range(0, int(map_size))] for j in range(0, int(map_size))] # for i in map: # print(i) while True: #Scores should decrease with time and get a bonus if they win agent_score -= 1 agent_state = agent_host1.peekWorldState() enemy_state = agent_host2.peekWorldState() if agent_state.number_of_observations_since_last_state > 0: agent_ob = json.loads(agent_state.observations[-1].text) if enemy_state.number_of_observations_since_last_state > 0: enemy_ob = json.loads(enemy_state.observations[-1].text) if agent_ob is None or enemy_ob is None: continue if agent_state.is_mission_running == False: break agent_position = (agent_ob["XPos"], agent_ob["ZPos"]) enemy_position = (enemy_ob["XPos"], enemy_ob["ZPos"]) agent_grid = agent_ob.get(u'floor3x3F', 0) enemy_grid = enemy_ob.get(u'floor3x3F', 0) if "lava" in agent_grid: print("Enemy Won!") agent_score -= 100 for i in map: print(i) return 0 break if "lava" in enemy_grid: print("Agent Won!") agent_score += 100 for i in map: print(i) return 1 break agentMoveString, agentBreakIndex = agentAlgo(agent_host1, agent_position, enemy_position, agent_grid, map) enemyMoveString, enemyBreakIndex = enemyAlgo(agent_host2, enemy_position, agent_position, enemy_grid, map) # #Agent Turn to Break attack(agent_host1, agentBreakIndex, agent_position, map) # #Enemy Turn to Move pos = movement(agent_host2, enemyMoveString, enemy_position) # #Enemy Turn to Break attack(agent_host2, enemyBreakIndex, pos, map, enemy=True) # #Agent Turn to Move movement(agent_host1, agentMoveString, agent_position) for i in map: print(i) return 2
def main(): sight = {'x': (-30, 30), 'z': (-30, 30), 'y': (-1, 1)} range_x = abs(sight['x'][1] - sight['x'][0]) + 1 range_y = abs(sight['y'][1] - sight['y'][0]) + 1 range_z = abs(sight['z'][1] - sight['z'][0]) + 1 malmoutils.fix_print() agent_host = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host) recordingsDirectory = malmoutils.get_recordings_directory(agent_host) recordingsDirectory = "../human_trajectories" if (not os.path.exists(recordingsDirectory)): os.mkdir(recordingsDirectory) logging.basicConfig(level=logging.INFO) # pdb.set_trace() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # set to INFO if you want fewer messages video_width = 640 video_height = 480 sys.argv mission_xml_path = "../custom_xmls/usar.xml" validate = True # my_mission = MalmoPython.MissionSpec(missionXML, validate) my_mission = MalmoPython.MissionSpec(getMissionXML(mission_xml_path), validate) # ObservationFromGrid my_mission.observeGrid(sight['x'][0], sight['y'][0], sight['z'][0], sight['x'][1], sight['y'][1], sight['z'][1], 'relative_view') # agent_host.setObservationsPolicy(MalmoPython.ObservationsPolicy.LATEST_OBSERVATION_ONLY) agent_host.setVideoPolicy(MalmoPython.VideoPolicy.LATEST_FRAME_ONLY) if agent_host.receivedArgument("test"): num_reps = 1 else: num_reps = 30000 my_mission_record = MalmoPython.MissionRecordSpec() if recordingsDirectory: my_mission_record.recordRewards() my_mission_record.recordObservations() my_mission_record.recordCommands() # if agent_host.receivedArgument("record_video"): # my_mission_record.recordMP4(24,2000000) my_mission_record.recordMP4(24, 2000000) recording_name = datetime.now().strftime("%Y-%m-%d_%I-%M-%S_%p") for iRepeat in range(1): my_mission_record.setDestination( os.path.join(recordingsDirectory, recording_name + ".tgz")) max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: logger.error("Error starting mission: %s" % e) exit(1) else: time.sleep(2) logger.info('Mission %s', iRepeat) logger.info("Waiting for the mission to start") world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() print() img_counter = 0 # print('observations', world_state.observations) while world_state.is_mission_running: world_state = agent_host.getWorldState() # Observations # msg = observe(agent_host) # if msg is not None: # print('timestamp: ', msg['timestamp']) # NOTE : Nothing recorded in world state. Uncomment to test it out. # if world_state.number_of_observations_since_last_state > 0: # timestamp = world_state.observations[-1].timestamp # msg = world_state.observations[-1].text # obs = json.loads(msg) # print("{'timestamp': timestamp, 'observations': obs}") # Video Frames while world_state.number_of_video_frames_since_last_state < 1 and world_state.is_mission_running: logger.info("Waiting for frames...") time.sleep(0.05) world_state = agent_host.getWorldState() logger.info("Got frame!") # import ipdb; ipdb.set_trace # print('observations', world_state.observations) # world_state.observations if world_state.is_mission_running: # timestamp = world_state.observations[-1].timestamp # msg = world_state.observations[-1].text # print(timestamp) # print(msg) frame = world_state.video_frames[-1] img = Image.frombytes('RGB', (640, 480), bytes(frame.pixels)) # imageio.imsave("./tmp_imgs/{}.png".format(img_counter), img) img_counter += 1 logger.info("Mission has stopped.") time.sleep(1) # let the Mod recover