def _get_actions(self, marines, minerals): if not minerals: return FUNCTIONS.no_op() selected_marine = None for m in marines: if m[1]['is_selected']: selected_marine = m break if selected_marine: marine_tag = selected_marine[0]['tag'] if marine_tag not in self.pairs: distances = get_distances(selected_marine, minerals) mineral = minerals[np.argmin(distances)] self.pairs[marine_tag] = mineral[0]['tag'] return self._move_marine_to_minarel(selected_marine, mineral) in_working_marines = set(self.pairs.keys()) idle_marines = [] for marine in marines: if marine[0]['tag'] not in in_working_marines: idle_marines.append(marine) if not idle_marines: return FUNCTIONS.no_op() return select_unit(idle_marines[0])
def build_unit(self, obs, unit_type): # Queen if (unit_type == units.Zerg.Queen): if (self.unit_type_is_selected(obs, units.Zerg.Hatchery) or self.unit_type_is_selected(obs, units.Zerg.Hive) or self.unit_type_is_selected(obs, units.Zerg.Lair)): if (self.can_do(obs, FUNCTIONS.Train_Queen_quick.id)): return FUNCTIONS.Train_Queen_quick('now') if (not self.unit_type_is_selected(obs, units.Zerg.Larva)): if (self.can_do(obs, FUNCTIONS.select_larva.id)): return FUNCTIONS.select_larva('now') else: bases = self.get_buildings(obs, units.Zerg.Hatchery, False) return FUNCTIONS.select_point( 'select_all_type', (bases[-1].x * 2, bases[-1].y * 2)) # Drone if (unit_type == units.Zerg.Drone): if (self.can_do(obs, FUNCTIONS.Train_Drone_quick.id)): return FUNCTIONS.Train_Drone_quick('now') # Overlord if (unit_type == units.Zerg.Overlord): if (self.can_do(obs, FUNCTIONS.Train_Overlord_quick.id)): return FUNCTIONS.Train_Overlord_quick('now') # Zergling if (unit_type == units.Zerg.Zergling): if (self.can_do(obs, FUNCTIONS.Train_Zergling_quick.id)): return FUNCTIONS.Train_Zergling_quick('now') # Roach if (unit_type == units.Zerg.Roach): if (self.can_do(obs, FUNCTIONS.Train_Roach_quick.id)): return FUNCTIONS.Train_Roach_quick('now') return FUNCTIONS.no_op()
def macro(self, obs): if obs.observation.player.idle_worker_count > 0: return self.redestribute_workers(obs) # if self.unit_type_is_selected(obs, units.Zerg.Overlord): # if 451 not in obs.observation.last_actions: # hatcheries = self.get_buildings(obs, units.Zerg.Hatchery) # if hatcheries[0].y < 100: # return FUNCTIONS.Smart_screen("now", (0, 0)) # else: # return FUNCTIONS.Smart_screen("now", (350, 350)) dronenum = obs.observation.player['food_workers'] if (obs.observation.player['food_cap'] - obs.observation.player['food_used'] < 2) \ and FUNCTIONS.Train_Overlord_quick.id not in obs.observation.last_actions: return self.build_unit(obs, units.Zerg.Overlord) # These buildings are essential and cannot be skipped if dronenum > 15: if len(self.get_buildings(obs, units.Zerg.Hatchery)) == 1: return self.build_building(obs, units.Zerg.Hatchery) if len(self.get_buildings(obs, units.Zerg.Extractor)) == 0: return self.build_building(obs, units.Zerg.Extractor) if len(self.get_buildings(obs, units.Zerg.SpawningPool)) == 0: return self.build_building(obs, units.Zerg.SpawningPool) if dronenum == 19: return self.redestribute_workers(obs) print("Drones in gas") if dronenum > 20 and \ len(obs.observation.build_queue) <= 3 and \ len(self.get_units_by_type(obs, units.Zerg.Queen)) <= 4 and \ obs.observation.player.minerals > 150: return self.build_unit(obs, units.Zerg.Queen) if dronenum == 27: # if not self.zergSpeed: # return self.upgrade(1) if len(self.get_units_by_type(obs, units.Zerg.Zergling)) < 100: return self.build_unit(obs, units.Zerg.Zergling) if dronenum == 28 and len(self.get_buildings( obs, units.Zerg.Hatchery)) == 2: return self.build_building(obs, units.Zerg.Hatchery) if dronenum == 29: return self.redestribute_workers(obs) if dronenum == 40: if len(self.get_buildings(obs, units.Zerg.Extractor)) < 4: return self.build_building(obs, units.Zerg.Extractor) if len(self.get_buildings(obs, units.Zerg.SporeCrawler) < 3): return self.build_building(obs, units.Zerg.SporeCrawler) if len(self.get_buildings(obs, units.Zerg.RoachWarren) < 1): return self.build_building(obs, units.Zerg.RoachWarren) if len(self.get_buildings(obs, units.Zerg.EvolutionChamber) < 2): return self.build_building(obs, units.Zerg.EvolutionChamber) if dronenum == 41: return self.redestribute_workers(obs) if dronenum > 60: #ALL UPGRADES return self.build_unit(obs, units.Zerg.Roach) if dronenum < 80: return self.build_unit(obs, units.Zerg.Drone) return FUNCTIONS.no_op()
def upgrade(self, obs, upgrade): if not self.unit_type_is_selected(units.Zerg.SpawningPool): spawningPool = self.get_buildings(obs, units.Zerg.SpawningPool) return FUNCTIONS.select_point( 'select_all_type', (spawningPool[-1].x * 2, spawningPool[-1].y * 2)) else: return FUNCTIONS.Research_ZerglingMetabolicBoost_quick("now") return FUNCTIONS.no_op()
def step(self, obs, actives, acts): timesteps = self.envs.step(obs, actives, acts) res = self.wrap_results(timesteps) if any(res[2]): # if done self.last_obs = res observations, rew, done = res next_obs, _, _ = observations['raws'] timesteps = self.envs.step(next_obs, actives, FUNCTIONS.no_op()) observations, _, _ = self.wrap_results(timesteps) res = (observations, rew, done) return res
def step(self, obs, *args): if obs.last(): self.reset() marine = get_units_by_type(obs.observation, units.Terran.Marine)[0] beacons = get_units_by_type(obs.observation, 317) targets = set((b[1]['x'], b[1]['y']) for b in beacons) if not targets: return FUNCTIONS.no_op() if self.target not in targets: self.target = None if not marine[1]['is_selected']: return select_unit(marine) if self.target: return FUNCTIONS.no_op() distance = get_distances(marine, beacons) beacon = beacons[np.argmin(distance)] self.target = (beacon[1]['x'], beacon[1]['y']) return FUNCTIONS.Move_screen("now", (beacon[1].x, beacon[1].y))
def get_sc2_action(self, gym_action) -> List[FunctionCall]: # Get coords by unravelling action. # How unravel works: # Ref: https://www.quora.com/What-is-a-simple-intuitive-example-for-the-unravel_index-in-Python coords = np.unravel_index(gym_action, (self.screen_shape[0], self.screen_shape[1])) # PySC2 uses different conventions for observations (y,x) and actions (x,y) # ::-1 reverses the tuple i.e. (1,2) becomes (2,1) if self.state['player_unit_stable_tags']: tag_to_move = self.state['player_unit_stable_tags'][ self.state["unit_to_move"]] actions = [FUNCTIONS.move_unit(tag_to_move, "now", coords[::-1])] else: actions = [FUNCTIONS.no_op()] return actions
def build_building(self, obs, building_type): if (not self.unit_type_is_selected(obs, units.Zerg.Drone)): drones = self.get_units_by_type(obs, units.Zerg.Drone) return FUNCTIONS.select_point('select_all_type', (drones[-1].x * 2, drones[-1].y * 2)) hatcheries = self.get_units_by_type(obs, units.Zerg.Hatchery) if (building_type == units.Zerg.Hatchery): if (self.can_do(obs, FUNCTIONS.Build_Hatchery_screen.id)): if hatcheries[0].y * 2 < 200: if len(hatcheries) == 1: return FUNCTIONS.Build_Hatchery_screen( "now", [58, 138]) else: return FUNCTIONS.Build_Hatchery_screen( "now", [122, 78]) else: if len(hatcheries) == 1: return FUNCTIONS.Build_Hatchery_screen( "now", [292, 244]) else: return FUNCTIONS.Build_Hatchery_screen( "now", [228, 304]) if (building_type == units.Zerg.Extractor): if (self.can_do(obs, FUNCTIONS.Build_Extractor_screen.id)): if hatcheries[0].y < 100: xy = 0 else: xy = 350 closestExtractor = self.get_closest_unit_to_pos( obs, xy, xy, units.Neutral.VespeneGeyser) return FUNCTIONS.Build_Extractor_screen( "now", [closestExtractor.x * 2, closestExtractor.y * 2]) if (building_type == units.Zerg.SpawningPool): if (self.can_do(obs, FUNCTIONS.Build_SpawningPool_screen.id)): if hatcheries[0].y * 2 > 200: offset = -10 else: offset = 10 return FUNCTIONS.Build_SpawningPool_screen( "now", [self.homeHatch.x * 2, (self.homeHatch.y * 2) + offset]) return FUNCTIONS.no_op()
def get_sc2_action(self, gym_action) -> List[FunctionCall]: if FUNCTIONS.Move_screen.id not in self.available_actions: return [FUNCTIONS.select_army("select")] # 0 = no-op if gym_action == 0: return [FUNCTIONS.no_op()] player_unit_xy = [ self.state["player_units"][0].x, self.state["player_units"][0].y ] target_xy = player_unit_xy # 0: No-op # 1: Up # 2: Down # 3: Left # 4: Right # 5: Up + Left # 6: Up + Right # 7: Down + Left # 8: Down + Right # Determine target position if gym_action in (1, 5, 6): # Up target_xy[1] = max(0, player_unit_xy[1] - 1) if gym_action in (2, 7, 8): # Down target_xy[1] = min(self.screen_shape[1] - 1, player_unit_xy[1] + 1) if gym_action in (3, 5, 7): # Left target_xy[0] = max(0, player_unit_xy[0] - 1) if gym_action in (4, 6, 8): # Right target_xy[0] = min(self.screen_shape[0] - 1, player_unit_xy[0] + 1) # Assign action function # Move_screen action = FUNCTIONS.Move_screen("now", target_xy) return [action]
def _step(self, action) -> TimeStep: actions = self.get_sc2_action(action) # Filter for only valid actions in this timestep valid_actions = [action for action in actions if (action.function in self.available_actions) or (action.function >= FUNCTIONS.move_unit.id)] if len(valid_actions) < len(actions): logger.warning("One or more invalid actions were ignored.") try: timestep = self.sc2_env.step(valid_actions)[0] except ValueError: logger.error("Error occurred when attempting to execute the actions: {}.".format(valid_actions)) timestep = self.sc2_env.step([FUNCTIONS.no_op()])[0] self.available_actions = timestep.observation['available_actions'] return timestep
def step(self, obs): super(CollectMineralShards, self).step(obs) if FUNCTIONS.Move_screen.id in obs.observation.available_actions: player_relative = obs.observation.feature_screen.player_relative minerals = _xy_locs(player_relative == _PLAYER_NEUTRAL) if not minerals: return FUNCTIONS.no_op() marines = _xy_locs(player_relative == _PLAYER_SELF) marine_xy = numpy.mean(marines, axis=0).round() # Average location. distances = numpy.linalg.norm(numpy.array(minerals) - marine_xy, axis=1) closest_mineral_xy = minerals[numpy.argmin(distances)] x, y = closest_mineral_xy return 331, [[0], [x, y]] else: return 7, [[0]]
def step(self, macros, update_observation=None): macro = macros[0] success = True for act_func, arg_func in macro: obs = self._last_obs[0] act = self.valid_func_call(act_func, arg_func, obs, macro) if act is None: success = False act = (FUNCTIONS.no_op(), ) obs = super().step(act, update_observation) self._last_obs = (self._timestep_factory.update(obs[0]), ) if not success: break logging.debug("%s execute %s", macro, "success" if success else "failed") self._last_obs[0]._macro_success = success if self._last_obs[0].last(): self._timestep_factory.reset() return self._last_obs
def get_sc2_action(self, gym_action) -> List[FunctionCall]: # Get coords by unravelling action. DQN only supports returning an integer as action. # How unravel works: # Ref: https://www.quora.com/What-is-a-simple-intuitive-example-for-the-unravel_index-in-Python idx = int(gym_action / self.resolution) coords = gym_action % self.resolution coords = np.unravel_index(coords, self.unravel_shape) coords = (coords[0], coords[1]) target_unit = self.state['player_units_stable'][idx] target_tag = target_unit.tag.item() player_unit_tags = [ unit.tag.item() for unit in self.state["player_units"] ] # .item() to convert numpy.int64 to native python type (int) # PySC2 uses different conventions for observations (y,x) and actions (x,y) # ::-1 reverses the tuple i.e. (1,2) becomes (2,1) if target_tag not in player_unit_tags: actions = [FUNCTIONS.no_op()] else: actions = [FUNCTIONS.move_unit(target_tag, "now", coords[::-1])] return actions
from pysc2.env import sc2_env from pysc2.lib.actions import FUNCTIONS from multiprocessing import Process, Pipe # no_op function _NO_OP = FUNCTIONS.no_op() def make_envs(args): return EnvPool([make_env(args) for i in range(args.envs)]) # based on https://github.com/ikostrikov/pytorch-a2c-ppo-acktr/blob/master/envs.py def make_env(args): def _thunk(): players = list() players.append(sc2_env.Agent(sc2_env.Race[args.agent_race])) AIF = sc2_env.parse_agent_interface_format( feature_screen=args.feature_screen_size, feature_minimap=args.feature_minimap_size, rgb_screen=args.rgb_screen_size, rgb_minimap=args.rgb_minimap_size, action_space=args.action_space, use_feature_units=args.use_feature_units, use_raw_units=args.use_raw_units, use_unit_counts=args.use_unit_counts) env = sc2_env.SC2Env( map_name=args.map, players=players, agent_interface_format=AIF, step_mul=args.step_mul,
def get_sc2_action(self, gym_action) -> List[FunctionCall]: if len(self.state["player_units_stable"]) == 0: return [FUNCTIONS.no_op()] # Get coords by unravelling action. DQN only supports returning an integer as action. # How unravel works: # Ref: https://www.quora.com/What-is-a-simple-intuitive-example-for-the-unravel_index-in-Python coords = np.unravel_index(gym_action, self.unravel_shape) # Get gym action for each marine gym_action_1, gym_action_2 = (coords[0] % self.number_adjacency, coords[1] % self.number_adjacency) # Get current coordinates for each marine marine_1_stable = self.state["player_units_stable"][0] marine_2_stable = self.state["player_units_stable"][1] # Get tags for each marine marine_1_tag = marine_1_stable.tag.item() marine_2_tag = marine_2_stable.tag.item() # Get target coordinates for each marine marine_1_curr_xy = next((unit.x, unit.y) for unit in self.state["player_units"] if unit.tag.item() == marine_1_tag) marine_2_curr_xy = next((unit.x, unit.y) for unit in self.state["player_units"] if unit.tag.item() == marine_2_tag) def get_target_xy(num, curr_coords): # 0: Up # 1: Down # 2: Left # 3: Right # 4: Up + Left # 5: Up + Right # 6: Down + Left # 7: Down + Right target_xy = list(curr_coords) # Determine target position if num in (0, 4, 5): # Up target_xy[1] = max(0, curr_coords[1] - 1) if num in (1, 6, 7): # Down target_xy[1] = min(self.screen_shape[1] - 1, curr_coords[1] + 1) if num in (2, 4, 6): # Left target_xy[0] = max(0, curr_coords[0] - 1) if num in (3, 5, 7): # Right target_xy[0] = min(self.screen_shape[0] - 1, curr_coords[0] + 1) return tuple(target_xy) marine_1_target_xy = get_target_xy(gym_action_1, marine_1_curr_xy) marine_2_target_xy = get_target_xy(gym_action_2, marine_2_curr_xy) # Assign action functions actions = [ FUNCTIONS.move_unit(marine_1_tag, "now", marine_1_target_xy), FUNCTIONS.move_unit(marine_2_tag, "now", marine_2_target_xy) ] return actions
def act(self, observations, actives, options): actions = [] obs, firsts, dones = observations['raws'] terms = np.zeros_like(actives) for i, (op, active, ob, first, done) in enumerate(zip(options, actives, obs, firsts, dones)): # check active if not active: actions.append(None) continue # reset env info if first: self.supply_counts[i] = 0 # SupplyDepot counts self.prev_foods[i] = ob.player.food_used self.coords[i] = [] # extract subtask info map_info = MAP_INFO[op] func = map_info['func'] addon = map_info['add_on'] is_quick = map_info['cmd_quick'] if is_quick: quick_func = map_info['quick_func'] _type, target = map_info['type-target'] avail_acts = np.array(ob.available_actions) # compute termination if _type == 'build' or addon: if self.deselect_flag[i]: self.deselect_flag[i] = False terms[i] = 1 elif addon: for id, count in ob.unit_counts: if id == target[1]: self.deselect_flag[i] = True else: for id, count in ob.unit_counts: if id == target: self.deselect_flag[i] = True # allow multiple SupplyDepots if id == 19: if count == self.supply_counts[i]: self.deselect_flag[i] = False elif count > self.supply_counts[i]: self.deselect_flag[i] = True self.supply_counts[i] = count else: print( 'Error. SupplyDepot count should not be less than count.' ) sys.exit(1) # build trial if self.build_trials == self.max_build_trials: self.deselect_flag[i] = True self.build_trials = 0 else: self.build_trials += 1 # reset coords if terms[i]: self.coords[i] = [] elif _type in ['unit', 'idle']: if self.deselect_flag[i]: self.deselect_flag[i] = False terms[i] = 1 else: if ob.player.food_used > self.prev_foods[i]: for act in ob.last_actions: if act == func.id: self.deselect_flag[i] = True self.prev_foods[i] = ob.player.food_used elif _type == 'select': for unit in ob.feature_units: if unit.unit_type == target and unit.is_selected: if unit.unit_type == 45 and unit.order_length == 0: self._selected = True else: self._selected = True # check if the unit is selected or the deselection flag is on if self._selected: # if selected, max noop if self.no_op_count == self.max_no_op: self.no_op_count = 0 terms[i] = 1 else: self.no_op_count += 1 else: if self.select_trials == self.max_select_trials: self._selected = True self.select_trials = 0 else: self.select_trials += 1 elif _type == 'gather': if self.deselect_flag[i]: self.deselect_flag[i] = False terms[i] = 1 elif func.id in ob.last_actions: self.deselect_flag[i] = True elif _type in ['mineral', 'gas']: if _type == 'mineral' and ob.player.minerals >= target: terms[i] = 1 if _type == 'gas' and ob.player.vespene >= target: terms[i] = 1 elif _type == 'food': if self.deselect_flag[i]: self.deselect_flag[i] = False terms[i] = 1 else: for id, count in ob.unit_counts: if id == target and count > self.supply_counts[i]: self.deselect_flag[i] = True self.supply_counts[i] = count if terms[i]: self.coords[i] = [] elif _type == 'no_op': if self.no_op_count == self.max_no_op: self.no_op_count = 0 terms[i] = 1 else: self.no_op_count += 1 else: raise NotImplementedError # check termination if terms[i]: actions.append(None) continue # execute the subtask if self.deselect_flag[i]: actions.append( FUNCTIONS.select_point("select", self.vespene_coord)) elif is_quick and quick_func.id in avail_acts: # Quick actions if quick_func == FUNCTIONS.select_idle_worker: if self._selected: actions.append(FUNCTIONS.no_op()) else: actions.append(quick_func("select")) else: actions.append(quick_func("now")) elif _type == 'build' and func.id in avail_acts: # Build if self._check_vacant(i, ob, func, map_info): actions.append(func("now", self.coords[i])) else: actions.append(self.build_structure(i, ob, func, map_info)) elif addon and func.id in avail_acts: # Addon if self._check_vacant(i, ob, func, map_info): actions.append(func("now", self.coords[i])) else: actions.append(self.build_structure(i, ob, func, map_info)) elif _type == 'select' and func.id in avail_acts: # Selection if self._selected: # when unit is selected & is terminating actions.append(FUNCTIONS.no_op()) else: coords, coord_exists = self._get_coord(ob, id=target) if coord_exists and (coords[0] < 0 or coords[1] < 0): print("[ ERROR ] : Got Negative coordinate.") from IPython import embed embed() raise ValueError # if the unit is out-of-screen, then ignore if coord_exists and (coords[0] >= 84 or coords[1] >= 84): self._selected = True actions.append(FUNCTIONS.no_op()) continue # if value error, ignore if coord_exists and (coords[0] is None or coords[1] is None): self._selected = True actions.append(FUNCTIONS.no_op()) continue # if coord exists return selection action, and if not, take noop if coord_exists: x, y = coords[0], coords[1] actions.append(func(0, [x, y])) else: actions.append(FUNCTIONS.no_op()) elif _type in ['mineral', 'gas']: actions.append(func()) elif _type == 'food' and func.id in avail_acts: # Food supply if self._check_vacant(i, ob, func, map_info): actions.append(func("now", self.coords[i])) else: actions.append(self.build_structure(i, ob, func, map_info)) elif _type == 'no_op': actions.append(func()) else: if (_type in ['build', 'food'] or addon) and self._check_available(func, avail_acts): actions.append(FUNCTIONS.no_op()) continue # if selectSCV suddenly becomes unavailable, then just take noop if _type == 'select' and target == 45: self._selected = True actions.append(FUNCTIONS.no_op()) continue print( '[ ERROR ] : Selected option seems not valid. Check if the condition is correct.' ) import pudb pudb.set_trace() # XXX DEBUG raise NotImplementedError return actions, terms
def noOp(): action = Action() action.addCommand(FUNCTIONS.no_op()) return action
def get_sc2_action(self, gym_action) -> List[FunctionCall]: return [FUNCTIONS.no_op()]