def __init__(self, env_config): self.size = 21 self.obs_size = 21 self.max_episode_steps = 100 self.log_frequency = 10 self.action_dict = { 0: 'move 1', 1: 'move -1', 2: 'turn 1', 3: 'turn 0', 4: 'attack 1' } # self.action_space = Box(-1, 1, shape=(3,), dtype=np.float32) self.action_space = Discrete(len(self.action_dict)) self.observation_space = Box(0, 1, shape=(np.prod( [1, self.obs_size, self.obs_size]), ), dtype=np.int32) self.agent_host = MalmoPython.AgentHost() self.agentinf = agent() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) self.obs = None self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = []
def __init__(self): self.agentHost = MalmoPython.AgentHost() try: self.agentHost.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agentHost.getUsage()) sys.exit(1)
def __init__(self, env_config): # Static Parameters self.size_y = 25 self.size_x = 10 self.size_z = 20 # self.reward_density = .1 # self.penalty_density = .02 # self.obs_size = 5 # self.obs_size_x = 5 # self.obs_size_y = 5 # self.obs_size_z = 5 self.max_episode_steps = 100 #100 self.log_frequency = 1 #10 self.num_ghasts = 1 self.action_dict = { 0: 'movewest 1', # Move one block forward 1: 'moveeast 1', # Turn 90 degrees to the right 2: 'attack 1', # Destroy block } # Rllib Parameters # continuous self.action_space = Box(low=np.array([-1, 0]), high=np.array([1, 1])) # [move attack] # discrete # self.action_space = Discrete(len(self.action_dict)) self.observation_space = Box(-50, 50, shape=(self.num_ghasts * 1 * 3, ), dtype=np.float32) # Malmo Parameters self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse( sys.argv ) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) # GhastKiller Parameters self.obs = None self.allow_break_action = False self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = [] self.ghasts = collections.defaultdict(dict) self.fireballs = collections.defaultdict(dict) self.agentState = {"pos": (0.5, 21, -9.5), "life": 20, "prevLife": 20} self.step_dodge = 0 self.episode_dodge = set() self.step_hitback = 0 self.episode_hitback = set() self.step_kill = 0
def two_agent_init(self): # Create default Malmo objects: agent1 = MalmoPython.AgentHost() agent2 = MalmoPython.AgentHost() try: agent1.parse(sys.argv) agent2.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent1.getUsage()) print(agent2.getUsage()) exit(1) if agent1.receivedArgument("help"): print(agent1.getUsage()) exit(0) if agent2.receivedArgument("help"): print(agent2.getUsage()) exit(0) return (agent1, agent2)
def __init__(self): super(MinecraftEnv, self).__init__() self.agent_host = MalmoPython.AgentHost() self.client_pool = None self.mc_process = None self.screen = None self.experiment_id = None self._turn = None self.load_mission_xml()
def one_agent_init(self): agent = MalmoPython.AgentHost() try: agent.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent.getUsage()) exit(1) if agent.receivedArgument("help"): print(agent.getUsage()) exit(0) return agent
def create_malmo_obj(): agent_host = MalmoPython.AgentHost() try: agent_host.parse( sys.argv ) except RuntimeError as e: print('ERROR:',e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument("help"): print(agent_host.getUsage()) exit(0) return agent_host
def __init__(self, env_config): # Static Parameters self.size = 35 self.mobCount = 5 #amount of mobs per mob type # Malmo Parametersa self.agent_host = MalmoPython.AgentHost() world_state = self.init_malmo() try: self.agent_host.parse( sys.argv ) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1)
def init_malmo(self, recordings_directory=DEFAULT_RECORDINGS_DIR): if self.is_malmo_initialized: return launch_minecraft_in_background('/app/MalmoPlatform/Minecraft', ports=[10000, 10001]) # Set up two agent hosts self.agent_host_bot = MalmoPython.AgentHost() self.agent_host_camera = MalmoPython.AgentHost() # Create list of Minecraft clients to attach to. The agents must # have been launched before calling record_malmo_video using # init_malmo() self.client_pool = MalmoPython.ClientPool() self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) self.client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001)) # Use bot's agenthost to hold the command-line options malmoutils.parse_command_line( self.agent_host_bot, ['--record_video', '--recording_dir', recordings_directory]) self.is_malmo_initialized = True
def __init__(self, env_config): # Static Parameters self.size = 50 self.reward_density = .1 self.penalty_density = .02 self.obs_size = 5 # todo: 100 steps self.max_episode_steps = 100 # todo: 10 frequency self.log_frequency = 10 self.action_dict = { 0: 'move 1', # Move one block forward 1: 'turn 1', # Turn 90 degrees to the right 2: 'turn -1', # Turn 90 degrees to the left 3: 'attack 1' # Destroy block } # Rllib Parameters self.action_space = Box(low=-1, high=1, shape=(3, )) # self.action_space = Discrete(len(self.action_dict)) self.observation_space = Box(0, 1, shape=(2 * self.obs_size * self.obs_size, ), dtype=np.float32) # Malmo Parameters self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) # DiamondCollector Parameters self.obs = None self.allow_break_action = False self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = [] self.pbar = tqdm(total=50000)
def __init__(self, missionXML, validate, setup_mission=None): super() self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) if self.agent_host.receivedArgument("help"): print(self.agent_host.getUsage()) exit(0) self.mission = MalmoPython.MissionSpec(missionXML, validate) self.mission_record = MalmoPython.MissionRecordSpec() if (setup_mission is not None): setup_mission(self.mission)
def __init__(self, env_config): # Static Parameters self.size = 50 self.reward_density = .1 self.penalty_density = .02 self.obs_size = 4 #changed from 5 self.max_episode_steps = 125 self.log_frequency = 10 self.action_dict = { 0: 'move 1', # Move one block forward 1: 'turn 1', # Turn 90 degrees to the right 2: 'turn -1', # Turn 90 degrees to the left 3: 'attack 1' # Destroy block } # Rllib Parameters self.action_space = Box( low=-1.0, high=1.0, shape=(2, ), dtype=np.float32) #CHANGED FROM DISCRETE TO BOX self.observation_space = Box( -1000, 1000, shape=(self.obs_size, ), dtype=np.float32) # Not sure what to do here # Malmo Parameters self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) # Agent Parameters self.obs = np.zeros(self.obs_size) self.last_obs = np.zeros(self.obs_size) self.zstart = 81.5 self.zend = 96.5 self.xleft = 662.5 self.xright = 648.5 self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = []
def __init__(self, env_config): # Static Parameters self.size = 50 self.reward_density = .1 self.penalty_density = .02 self.obs_size = 3 self.depth = 30 self.max_episode_steps = 30 self.log_frequency = 10 self.num_episode = 0 self.xz_coordinate = 2.5, 2.5 self.action_dict = { 0: 'move 1', # Move one block forward 1: 'strafe -1', # Moves left 2: 'strafe 1', # Moves right 3: 'move -1', # Moves back 4: 'move 0' # Moves 0 } self.landInWater = False # Rllib Parameters self.action_space = Discrete(len(self.action_dict)) self.observation_space = Box( 0, 1, shape=(np.prod([self.depth, self.obs_size, self.obs_size]), ), dtype=np.int32) # Malmo Parameters self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) # DiamondCollector Parameters self.obs = None self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = []
def __init__(self, env_config): # Static Parameters self.penalty_density = .02 self.obs_size = 5 self.log_frequency = 10 self.max_episode_steps = 400 self.action_dict = { 0: 'move 0', # Stop 1: 'strafe 1', # Move one block left 2: 'strafe -1', # Move one block right 3: 'attack 1', } # Rllib Parameters self.action_space = Discrete(len(self.action_dict)) self.observation_space = Box(0, 1, shape=(2 * self.obs_size * self.obs_size, ), dtype=np.float32) # Malmo Parameters self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) # CallMePro Parameters self.obs = None self.face_brick_move = False self.face_gold_move = False self.face_diamond_move = False self.face_stone_move = False self.face_diamondblock_move = False self.face_coal_move = False self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = []
def __init__( self, missionXML, validate, setup_mission=None, ip="127.0.0.1", port=10000 ): super() self.agent_host = MalmoPython.AgentHost() self.clientPool = MalmoPython.ClientPool() self.clientPool.add(MalmoPython.ClientInfo(ip, port)) try: self.agent_host.parse(sys.argv) except RuntimeError as e: print("ERROR:", e) print(self.agent_host.getUsage()) exit(1) if self.agent_host.receivedArgument("help"): print(self.agent_host.getUsage()) exit(0) self.mission = MalmoPython.MissionSpec(missionXML, validate) self.mission_record = MalmoPython.MissionRecordSpec() if setup_mission is not None: setup_mission(self.mission)
def __init__(self, env_config): # Static Parameters self.size = 50 self.reward_density = .1 self.penalty_density = .02 self.obs_size = 5 self.max_episode_steps = 100 self.log_frequency = 10 self.action_dict = { 0: 'move 1', # Move one block forward 1: 'strafe -1', # strafe to the left 2: 'strafe 1', # strafe to the right #3: 'turn -1', # Turn 90 degrees to the left } # Rllib Parameters self.action_space = Discrete(len(self.action_dict)) #BACK TO DISCRETE FOR FINAL REPORT #self.observation_space = Box(-1000, 1000, shape=(self.obs_size,), dtype=np.float32) # Not sure what to do here self.observation_space = Box(0, 1, shape=(self.obs_size * self.obs_size, ), dtype=np.float32) # Malmo Parameters self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse( sys.argv ) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) # Agent Parameters self.obs = np.zeros(self.obs_size) self.last_obs = np.zeros(self.obs_size) self.zstart = 82.5 self.zend = 96.5 self.xstart = 655.5 self.xend = 648.5 self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = []
def __init__(self, env_config): # Static Parameters self.size = 50 self.reward_density = .1 self.penalty_density = .02 self.obs_size = 6 self.max_episode_steps = 400 self.log_frequency = 1 self.episode_num = 0 self.quit = False self.reached = False # Rllib Parameters self.action_space = Box(-1, 1, shape=(3, ), dtype=np.float32) self.observation_space = Box( 0, 1, shape=(np.prod([15, self.obs_size + 1, self.obs_size + 1]), ), dtype=np.int32) # Malmo Parameters self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) # DiamondCollector Parameters self.obs = None self.cur_pos = (0, 0, 0) self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = [] # ADDED self.prev_pos = (0, 0, 0) self.temp_pos = (0, 0, 0)
def __init__(self, size, obs_size, num_entities=5, episodes=100): self.size = size self.obs_size = obs_size self.num_entities = num_entities self.num_entities_copy = num_entities #copy it to make sure each mission has the right number of zombies in the beginning of each episode self.episodes = episodes # Create default Malmo objects: #the agent self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) if self.agent_host.receivedArgument("help"): print(self.agent_host.getUsage()) exit(0) self.world_state = self.agent_host.getWorldState() self.is_mission_running = self.world_state.is_mission_running self.cobblestone_wall = 0
def __init__(self, env_config): # Static Parameters self.max_episode_steps = 100 self.log_frequency = 10 # Rllib Parameters #self.action_space = Discrete(len(self.action_dict)) self.action_space = Box(-1, 1, shape=(3, ), dtype=np.float32) self.observation_space = Box(0, 1, shape=(2 * self.obs_size * self.obs_size, ), dtype=np.float32) # Malmo Parameters self.agent_host = Malmo.AgentHost() # DiamondCollector Parameters self.obs = None self.allow_break_action = False self.episode_step = 0 self.episode_return = 0 self.returns = [] self.steps = []
<AgentHandlers> <ObservationFromFullStats/> <DiscreteMovementCommands/> <AbsoluteMovementCommands/> <AgentQuitFromTouchingBlockType> <Block type="torch"/> </AgentQuitFromTouchingBlockType> <ChatCommands/> <MissionQuitCommands/> </AgentHandlers> </AgentSection> </Mission>''' # Create default Malmo objects: agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument("help"): print(agent_host.getUsage()) exit(0) my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission_record = MalmoPython.MissionRecordSpec() # Attempt to start a mission: max_retries = 3
def setupMinecraft(): ''' Setup the Minecraft environment NOTE: action space relies heavily on the coordinate system and minecraft has a weird coord system ''' # 0: up, 1: up, 2: down, 3: left, 4: right action_space = ["move 0", "move 1", "move -1", "strafe -1", "strafe 1"] # Create default Malmo objects: agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument("help"): print(agent_host.getUsage()) exit(0) # Set up the mission mission_file = './mission.xml' with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file) mission_xml = f.read() my_mission = MalmoPython.MissionSpec(mission_xml, True) # Force reset of the environment, generate a brand new world every episode my_mission.forceWorldReset() # Python code for alterations to the environment my_mission.drawCuboid(-1, 106, -1, GRID_SIZE, 106, GRID_SIZE, "lava") my_mission.drawCuboid(-1, 107, -1, GRID_SIZE, 107, GRID_SIZE, "obsidian") # my_mission.drawCuboid(0, 108, 0, GRID_SIZE-1, 110, GRID_SIZE-1, "air") # makes steve move # Generating the map gridmap = reset_map(GRID_SIZE, MAP_PATH) for j in range(GRID_SIZE): for i in range(GRID_SIZE): if gridmap[j][i] == '1': my_mission.drawBlock(i, 108, j, "stone") my_mission.drawBlock(i, 109, j, "stone") for j in range(GRID_SIZE): for i in range(GRID_SIZE): if gridmap[j][i] == '2': my_mission.drawBlock(i, 107, j, "lava") pass # Placing diamonds on map diamond_spots = [(4, 6), (0, 0), (5, 1), (9, 2), (7, 8), (0, 9), (7, 4), (8, 0), (1, 6), (8, 6)] for d in diamond_spots: my_mission.drawItem(d[0], 109, d[1], "diamond") # Extra aesthetics my_mission.drawBlock(-1, 111, -1, "torch") my_mission.drawBlock(-1, 111, GRID_SIZE, "torch") my_mission.drawBlock(GRID_SIZE, 111, -1, "torch") my_mission.drawBlock(GRID_SIZE, 111, GRID_SIZE, "torch") # Idea for another mission # my_mission.drawLine(0, 107, 8, 15, 107, 8, "netherrack") # my_mission.drawBlock(8, 108, 8, "fire") # Can't add a door properly, only adding half a door? # my_mission.drawBlock(11, 108, 6, "wooden_door") # my_mission.drawBlock(11, 109, 6, "wooden_door") # Placing Steve in the map x = np.random.randint(0, 9) + 0.5 z = np.random.randint(0, 9) + 0.5 # my_mission.startAt(x, 108, z) my_mission.startAt(4.5, 108, 3.5) my_mission_record = MalmoPython.MissionRecordSpec() print(my_mission.getSummary()) return agent_host, my_mission, my_mission_record, action_space
if command == 2: return "go there" if command == 3: return "follow" if command == 4: return "sit" if command == 5: return -1 if __name__ == '__main__': # Create default Malmo objects: agent_host = MalmoPython.AgentHost() #scout_ai = ScoutAI(agent_host) scout_ai = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host) commandQueue = CommandQueue() prev_command = 0 counter = 0 my_mission = MalmoPython.MissionSpec(buildEnvironment(), True) my_mission.allowAllChatCommands() client_pool = MalmoPython.ClientPool() client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10000)) # client_pool.add(MalmoPython.ClientInfo( "127.0.0.1", 10001) ) agent_host_record = MalmoPython.MissionRecordSpec() # scout_record = MalmoPython.MissionRecordSpec()
import sys from keras.models import Sequential, load_model from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute from keras.optimizers import Adam from past.utils import old_div from malmo import MalmoPython import logging from malmo import malmoutils import traceback malmoutils.fix_print() # initalize two agents agent_host1 = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host1) recordingsDirectory1 = malmoutils.get_recordings_directory(agent_host1) agent_host2 = MalmoPython.AgentHost() malmoutils.parse_command_line(agent_host2) recordingsDirectory2 = malmoutils.get_recordings_directory(agent_host2) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # client pool client_pool = MalmoPython.ClientPool() client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10000)) client_pool.add(MalmoPython.ClientInfo('127.0.0.1', 10001))
def main(): # Start mission # Create default Malmo objects: global agent_host global matrix2dOriginal global maze_map global actionHistCounter agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_host.getUsage()) exit(1) if agent_host.receivedArgument("help"): print(agent_host.getUsage()) exit(0) # The following one line is for setting how many times you want the agent to repeat num_repeats = 50 esFile = open("Eval_Stats.txt", "w+") esFile.write("\n") esFile.close() esFile = open("Eval_Stats.txt", "a") trFile = open("training_result.txt", "w+") trFile.write("\n") trFile.close() trFile = open("training_result.txt", "a") for i in range(num_repeats): esFile.write("Run #" + str(i + 1) + "\n") actionHistCounter = i + 1 # size = int(6 + 0.5*i) print("Size of maze:", size_of_maze) #my_mission = MalmoPython.MissionSpec(get_mission_xml("0", 0.4 + float(i/20.0), size_of_maze, 0), True) randomDif = random.uniform(-0.2, 0.2) print("Parameters of the mission:", str(i), "next:", 0.4 + randomDif, "size:", size_of_maze) my_mission = MalmoPython.MissionSpec( get_mission_xml(str(i), 0.4 + randomDif, size_of_maze, 0), True) # my_mission = MalmoPython.MissionSpec(get_mission_xml(), True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(1) # Attempt to start a mission: max_retries = 3 my_clients = MalmoPython.ClientPool() my_clients.add(MalmoPython.ClientInfo( '127.0.0.1', 10000)) # add Minecraft machines here as available for retry in range(max_retries): try: agent_host.startMission(my_mission, my_clients, my_mission_record, 0, "%s-%d" % ('Moshe', i)) break except RuntimeError as e: if retry == max_retries - 1: print("Error starting mission", (i + 1), ":", e) exit(1) else: time.sleep(2) # Loop until mission starts: print( "Waiting for the mission", (i + 1), "to start ", ) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: #sys.stdout.write(".") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() print("Mission", (i + 1), "running.") grid = load_grid(world_state, agent_host) # print("World State Grid:", grid) print("Size of actual map:", len(grid)) maze_map = get_maze_map(grid) print("maze map:", len(maze_map)) #print(maze_map[244]) #The maze construction matrix2dOriginal = maze_to_2dMatrix(maze_map, size_of_maze) matrix2d = maze_to_2dMatrix_reversed(maze_map, size_of_maze) print("the matrix 2d: ", matrix2d) matrixArray = matrix2d.flatten() start_and_end_positions_in_actual_map = find_start_end(grid) print("size of maze map:", len(maze_map)) print("first position in actual map:", first_block_index_in_actual_map) print("last position in actual map:", last_block_index_in_actual_map) global agent_current_position_xy_in_maze, agent_current_position_index_in_grid agent_current_position_xy_in_maze = get_xy_position_of_maze_map_by_position_of_actual_map(\ start_and_end_positions_in_actual_map[0], \ grid) print("Started: agent current position(xy in maze):", agent_current_position_xy_in_maze) agent_current_position_index_in_grid = get_position_of_actual_map_by_xy_position_of_maze_map(\ agent_current_position_xy_in_maze, grid) print("Started: agent current position(index in grid):", agent_current_position_index_in_grid \ , "compared with real position:", start_and_end_positions_in_actual_map[0]) index_of_yaw = my_mission.getAsXML(True).index("yaw") yaw_of_agent = int( re.compile("(\d+)").match( my_mission.getAsXML(True)[index_of_yaw + 5:index_of_yaw + 8]).group(1)) sync_agent_direction_with_yaw(yaw_of_agent) print("Started: agent current yaw(face to where):", yaw_of_agent) # go_to_goal_and_finish_mission(grid, start_and_end_positions_in_actual_map[0], \ # start_and_end_positions_in_actual_map[1], world_state, agent_host, i) print("Started: How many walkable blocks in front of agent's direction:", agent_current_direction, "is walk able? Answer:", \ get_num_of_walkable_blocks_in_front_of_agent(agent_current_position_xy_in_maze, size_of_maze, grid)) # test_moving(agent_host, [3, 3, 0, 3, 3, 0, 3]) positionTransition(grid, matrixArray, yaw_of_agent, size_of_maze) trainingStart = time.time() trainingProcess = Process(target=missionTrainingStart, args=(actionHistCounter, )) trainingProcess.start() stringList = [] is_complete_action_history = False curr_action_counter = 0 while True: if not is_complete_action_history: actionHistFile = None while True: try: actionHistFile = open( "action_history_" + str(actionHistCounter) + "_.txt", "r") stringList = actionHistFile.readlines() if len(stringList) != 0: break except: continue #actionHistFile = open("action_history_"+str(actionHistCounter)+"_.txt", "r") #stringList = actionHistFile.readlines() print("Reading action history file, get string: ", stringList) curr_action_list = stringList[0].split(' ') actionHistFile.close() print("Here is the list length:", len(curr_action_list), curr_action_counter + 1) try: if (len(curr_action_list) >= curr_action_counter + 1): action = curr_action_list[curr_action_counter] convertAction = directionConvert(int(action[0])) test_moving(agent_host, [convertAction], grid) curr_action_counter += 1 except ValueError: # The last index of action is a newline character break if (stringList[len(stringList) - 1] == "END"): is_complete_action_history = True if (is_complete_action_history and len(curr_action_list) == curr_action_counter - 1): break trainingProcess.join() trainingEnd = time.time() trainingElapsed = trainingEnd - trainingStart esFile.write("Training Time: " + str(trainingElapsed) + " ") #actionHistFile.close() ''' print(stringList) actionCollection = [] positionCollection = [] for n in range(0, len(stringList)-1): tmp = stringList[n].split(' ') for m in range(0,len(tmp)-1): L = tmp[m].split(',') actionCollection.append(L[0]) positionCollection.append([L[1],L[2]]) print('The original: ',actionCollection) print(positionCollection) ''' """ del stringList[-1] for string in stringList: actionCollection = string.split(' ') del actionCollection[-1] for aindex in range(len(actionCollection)): converted = directionConvert(int(actionCollection[aindex])) actionCollection[aindex] = converted actionList.append(actionCollection) """ """ for testingset in actionList: #check if it's reachable test_moving(agent_host, testingset) """ ''' actionList = [] for index in range(len(actionCollection)): row,col = positionCollection[index][0], positionCollection[index][1] action = actionCollection[index] print(matrix2d[int(row)][int(col)]) if matrix2d[int(row)][int(col)] == 0: convertAction = directionConvert(int(action)) actionList.append(convertAction) #print('THIS IS THE ACTION: ',len(actionList), actionList) print('The list:', actionList) #raise('STOP HERE') test_moving(agent_host, actionList, grid) ''' print( "Training complete. Training result can be found in training_result.txt." ) travelStart = time.time() go_to_goal_and_finish_mission(grid, agent_current_position_index_in_grid, \ start_and_end_positions_in_actual_map[1], world_state, agent_host, i) travelEnd = time.time() travelElapsed = travelEnd - travelStart esFile.write("Agent Travel Time: " + str(travelElapsed) + "\n\n") print("Aiku did it!") trFile.close() esFile.close()
import json import logging import math import os import random import sys import time import re import uuid from collections import namedtuple from operator import add EntityInfo = namedtuple('EntityInfo', 'x, y, z, name') # Create one agent host for parsing: agent_hosts = [MalmoPython.AgentHost()] # Parse the command-line options: agent_hosts[0].addOptionalFlag("debug,d", "Display debug information.") agent_hosts[0].addOptionalIntArgument( "agents,n", "Number of agents to use, including observer.", 4) PICKAXE_POS = [292, 436] D_TOOL_POS = [290, 436] try: agent_hosts[0].parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(agent_hosts[0].getUsage()) exit(1) if agent_hosts[0].receivedArgument("help"):
def main(model=None, mode='train', start_episode=0): my_xml = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hill Descent.</Summary> </About> <ModSettings> <MsPerTick>20</MsPerTick> </ModSettings> <ServerSection> <ServerInitialConditions> <Time><StartTime>1</StartTime></Time> </ServerInitialConditions> <ServerHandlers> <DefaultWorldGenerator seed="-999595225643433963" forceReset="false" destroyAfterUse="false" /> <ServerQuitFromTimeUp timeLimitMs="100000000"/> <ServerQuitWhenAnyAgentFinishes/> </ServerHandlers> </ServerSection> <AgentSection mode="Survival"> <Name>Bob</Name> <AgentStart> <Placement x="28.5" y="87" z="330.5" pitch="-90" yaw="0"/> </AgentStart> <AgentHandlers> <DiscreteMovementCommands/> <MissionQuitCommands quitDescription="done"/> <ChatCommands/> <ObservationFromFullStats/> <ObservationFromGrid> <Grid name="sight"> <min x="{}" y="{}" z="{}"/> <max x="{}" y="{}" z="{}"/> </Grid> <Grid name="feet"> <min x="0" y="-1" z="0"/> <max x="0" y="-1" z="0"/> </Grid> </ObservationsationFromGrid> <AgentQuitFromTouchingBlockType> <Block type="cobblestone" /> </AgentQuitFromTouchingBlockType> </AgentHandlers> </AgentSection> </Mission> '''.format(-(grid_width - 1) // 2, -grid_height, -(grid_width - 1) // 2, (grid_width - 1) // 2, grid_height, (grid_width - 1) // 2) batch_size = 100 agent = DQNAgent(state_size, action_size, learning_rate, discount_rate, epsilon, epsilon_min, epsilon_decay) if model != None: agent.load(model) if mode == 'test': agent.epsilon = 0.0 print('loaded model: {}'.format(model)) else: clear_csv('./data/results.csv') clear_csv('./data/moves.csv') my_client_pool = MalmoPython.ClientPool() my_client_pool.add(MalmoPython.ClientInfo("127.0.0.1", 10001)) agent_host = MalmoPython.AgentHost() for e in range(start_episode + 1, episodes + 1): my_mission = MalmoPython.MissionSpec(my_xml, True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.requestVideo(800, 500) my_mission.setViewpoint(2) print("Waiting for the mission to start", end=' ') agent_host.startMission( my_mission, my_mission_record, ) world_state = agent_host.getWorldState() while not world_state.has_mission_begun: print(".", end="") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print("Error:", error.text) print() agent_host.sendCommand('chat /kill @e[type=Chicken]') agent_host.sendCommand('chat /kill @e[type=Pig]') agent_host.sendCommand('chat /kill @e[type=Cow]') moves = 0 episode_reward = 0 while world_state.is_mission_running: world_state = agent_host.getWorldState() if world_state.number_of_observations_since_last_state > 0: try: obvsText = world_state.observations[-1].text data = json.loads(obvsText) except: print("Error when getting state") continue state = get_state(data) prev_x = data.get(u'XPos', 0) prev_y = data.get(u'YPos', 0) prev_z = data.get(u'ZPos', 0) useful_state = [state[2], state[6], state[7], state[8], \ state[10], state[11], state[13], \ state[14], state[16], state[17], \ state[18], state[22]] action = agent.act(useful_state) if ((action == 0 and state[grid_center - grid_width] == 0) or (action == 1 and state[grid_center + 1] == 0) or (action == 2 and state[grid_center + grid_width] == 0) or (action == 3 and state[grid_center - 1] == 0)): agent_host.sendCommand(jump_directions[action]) else: agent_host.sendCommand(directions[action]) time.sleep(0.25) #print("North:", state[grid_center - grid_width], \ # " East:", state[grid_center + 1], \ # " South:", state[grid_center + grid_width], \ # " West:", state[grid_center - 1]) try: world_state = wait_world_state(agent_host, world_state) obvsText = world_state.observations[-1].text data = json.loads(obvsText) except: print("Error when getting state") continue current_x = data.get(u'XPos', 0) current_y = data.get(u'YPos', 0) current_z = data.get(u'ZPos', 0) damage_taken = calculate_damage(prev_y, current_y) next_state = get_state(data) useful_next_state = [state[2], state[6], state[7], state[8], \ state[10], state[11], state[13], \ state[14], state[16], state[17], \ state[18], state[22]] # print("previous and current y", prev_y, current_y) # print("damage taken", damage_taken) #print("X:", prev_x, current_x, "\n", \ # "Y:", prev_y, current_y, "\n", \ # "Z:", prev_z, current_z, "\n") reward = 2 * ( prev_y - current_y ) - 50 * damage_taken - 1 if prev_x != current_x or prev_y != current_y or prev_z != current_z else -1000 episode_reward += reward done = True if current_y <= goal_height or not world_state.is_mission_running or data[ 'Life'] <= 0 else False agent.remember(useful_state, action, reward, useful_next_state, done) if ((action == 0 and state[grid_center - grid_width] == 0) or (action == 1 and state[grid_center + 1] == 0) or (action == 2 and state[grid_center + grid_width] == 0) or (action == 3 and state[grid_center - 1] == 0)): print( 'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}' .format(e, episodes, jump_directions[action], reward, agent.epsilon, moves, done)) else: print( 'episode {}/{}, action: {}, reward: {}, e: {:.2}, move: {}, done: {}' .format(e, episodes, directions[action], reward, agent.epsilon, moves, done)) moves += 1 if mode == 'train' or model == None: write_to_csv('./data/moves.csv', [e, current_x, current_y, current_z, reward]) if e > batch_size: agent.replay(batch_size) if done or moves > max_moves: agent_host.sendCommand("quit") if (mode == 'train' or model == None) and (e in checkpoints or agent.epsilon <= epsilon_min): print('saving model at episode {}'.format(e)) agent.save('./models/model_{}'.format(e)) if agent.epsilon <= epsilon_min: break time.sleep(1) # my_mission.forceWorldReset() if mode == 'train' or model == None: write_to_csv('./data/results.csv', [e, episode_reward, moves, int(episode_reward > 0)])
def __init__(self, env_config): # Static Parameters self.size = 10 self.reward_density = .1 self.penalty_density = .02 self.obs_size = 5 self.max_global_steps = (self.size * 2)**2 self.log_frequency = 10 self.action_dict = { 0: 'move 1', # Move one block forward 1: 'turn 1', # Turn 90 degrees to the right 2: 'turn -1', # Turn 90 degrees to the left 3: 'attack 1', # Destroy block 4: 'jumpmove 1' # Jump up and move forward 1 block } self.blocks_dict = { "redstone_ore": 1, "coal_ore": 2, "emerald_ore": 3, "iron_ore": 4, "gold_ore": 5, "diamond_ore": 6, "lava": -1, "flowing_lava": -1 } # Rllib Parameters self.action_space = Discrete(len(self.action_dict)) self.observation_space = Box(-1, 6, shape=(np.prod( [2, self.obs_size, self.obs_size]), ), dtype=np.int32) # Malmo Parameters self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) # ResourceCollector Parameters self.obs = None self.obsdict = None # Stores last json loaded observation self.episode_step = 0 self.episode_return = 0 self.returns = [] self.resources_collected = { "diamond": [0], "redstone": [0], "coal": [0], "emerald": [0], "iron_ore": [0], "gold_ore": [0] } self.deaths = [] self.death_occurred = False self.steps = [] self.episode_start = time.time() self.episode_end = time.time()
from utility import safeStartMission, reload, updateWorldState from basicAgent import basic_agent import os import sys import time NUM_OF_GAMES = 100 if sys.version_info[0] == 2: sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) # flush print output immediately else: import functools print = functools.partial(print, flush=True) # Create default Malmo objects: agent_host = MalmoPython.AgentHost() opponent_host = MalmoPython.AgentHost() spectator = MalmoPython.AgentHost() try: spectator.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(spectator.getUsage()) exit(1) if spectator.receivedArgument("help"): print(spectator.getUsage()) exit(0) mission_file = './simple_arena.xml' with open(mission_file, 'r') as f: print("Loading mission from %s" % mission_file)
def doXML(area): missionXML = '''<?xml version="1.0" encoding="UTF-8" standalone="no" ?> <Mission xmlns="http://ProjectMalmo.microsoft.com" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <About> <Summary>Hello world!</Summary> </About> <ServerSection> <ServerHandlers> <FlatWorldGenerator generatorString="3;7,0,5*3,2;3;,biome_1" forceReset="true"/> <DrawingDecorator> ''' + placeBottom(area) + ''' </DrawingDecorator> </ServerHandlers> </ServerSection> <AgentSection mode="Creative"> <Name>SketchyAI</Name> <AgentStart> ''' + '<Placement x="{0}" y="{1}" z="{2}" yaw="0"/>'.format(.5, 30, .5) + ''' </AgentStart> <AgentHandlers> <ObservationFromFullStats/> <ContinuousMovementCommands turnSpeedDegs="180"/> </AgentHandlers> </AgentSection> </Mission>''' agent_host = MalmoPython.AgentHost() try: agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print() agent_host.getUsage() exit(1) if agent_host.receivedArgument("help"): print() agent_host.getUsage() exit(0) my_mission = MalmoPython.MissionSpec(missionXML, True) my_mission_record = MalmoPython.MissionRecordSpec() my_mission.setModeToSpectator() # Attempt to start a mission: max_retries = 3 for retry in range(max_retries): try: agent_host.startMission(my_mission, my_mission_record) break except RuntimeError as e: if retry == max_retries - 1: print ("Error starting mission:", e) exit(1) else: time.sleep(2) # Loop until mission starts: print ("Waiting for the mission to start ", world_state = agent_host.getWorldState()) while not world_state.has_mission_begun: sys.stdout.write(".") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print ("Error:", error.text) print() print ("Mission running ") # Loop until mission ends: while world_state.is_mission_running: sys.stdout.write(".") time.sleep(0.1) world_state = agent_host.getWorldState() for error in world_state.errors: print ("Error:", error.text) print() print ("Mission ended")
def __init__(self, missionXML, n_games=500, max_retries=3, starting_zombies=1, XSize=10, ZSize=10, aggregate_episode_every=5, agent_search_resolution=30, load_model=False): # keras attributes self.n_games = n_games self._init_logger() # keras self.n_actions = 4 self.agent = Agent(gamma=0.99, epsilon=1.0, alpha=0.0005, input_dims=7, n_actions=self.n_actions, mem_size=1000000, batch_size=64, epsilon_end=0.01) self._load_dqn_model(load_model) self.scores = [] self.eps_history = [] self.aggregate_episode_every = aggregate_episode_every # qtable self.Qtb = {} self._load_qtable(load_model) self.epsilon = 0.01 # chance of taking a random action instead of the best # agent self.agent_host = MalmoPython.AgentHost() try: self.agent_host.parse(sys.argv) except RuntimeError as e: print('ERROR:', e) print(self.agent_host.getUsage()) exit(1) # mission self.missionXML = missionXML # self._validate_mission() self.max_retries = max_retries #adding clients self.my_client_pool = None # self._add_starters() self._add_default_client() self.world_state = None # mission generator self.mission_generator = MissionGenerator(self.missionXML) self.starting_zombies = starting_zombies self.num_zombies = starting_zombies self.zombie_difference = 0 # for reward calculation self.XSize = XSize self.ZSize = ZSize # canvas self.visual = Visualizer(arena_width=self.XSize, arena_breadth=self.ZSize) # direction learner variables self.agent_search_resolution = agent_search_resolution self.agent_stepsize = 1 self.agent_turn_weight = 100 self.agent_edge_weight = -100 self.agent_mob_weight = -10 self.agent_turn_weight = 0 # Negative values to penalise turning, positive to encourage. self.turning_diff = 0 # for visualization self.flash = False self.current_life = 0 # main loop variables self.self_x = 0 self.self_z = 0 self.current_yaw = 0 self.ob = None self.all_zombies_dead = False self.num_heals = 0 self.life_decrease_penalty = 0 self.TimeAlive = 0 self.time_rewards = 0 self.heal_rewards = 0 self.move_backwards_reward = 0