def get_sc2_action(self, gym_action) -> List[FunctionCall]: # Get coords by unravelling action. # How unravel works: # Ref: https://www.quora.com/What-is-a-simple-intuitive-example-for-the-unravel_index-in-Python # ===== Testing for A2C ===== # is_attack = gym_action > self.move_space # if is_attack: gym_action -= self.resolution coords = np.unravel_index(gym_action, self.unravel_shape) # PySC2 uses different conventions for observations (y,x) and actions (x,y) # ::-1 reverses the tuple i.e. (1,2) becomes (2,1) # ===== Testing for A2C ===== # action = FUNCTIONS.Attack_screen("now", coords[::-1]) if is_attack else FUNCTIONS.Move_screen("now", coords[::-1]) action = FUNCTIONS.Attack_screen("now", coords[::-1]) if action.function not in self.available_actions: # logger.warning("Attempted unavailable action {}.".format(action)) action = FUNCTIONS.select_army("select") return [action]
def get_sc2_action(self, gym_action) -> List[FunctionCall]: if FUNCTIONS.Move_screen.id not in self.available_actions: return [FUNCTIONS.select_army("select")] # For restricting the action space for A2C, does not break DQN gym_action %= self.action_space.n # Find mean coordinates of all currently active player units player_units_xy = [(unit.x, unit.y) for unit in self.state["player_units"]] arr = np.asarray(player_units_xy) length = arr.shape[0] x_sum = np.sum(arr[:, 0]) y_sum = np.sum(arr[:, 1]) centroid = (int(x_sum / length), int(y_sum / length)) # 0: Up # 1: Down # 2: Left # 3: Right # 4: Up + Left # 5: Up + Right # 6: Down + Left # 7: Down + Right # 8: Up + Attack # 9: Down + Attack # 10: Left + Attack # 11: Right + Attack # 12: Up + Left + Attack # 13: Up + Right + Attack # 14: Down + Left + Attack # 15: Down + Right + Attack is_attack = gym_action > self.move_space if is_attack: gym_action %= self.adjacency target_xy = list(centroid) x_max = self.screen_shape[0] - 1 y_max = self.screen_shape[1] - 1 # Determine target position, diff => min(abs(x_diff), abs(y_diff)) if gym_action == 0: target_xy[1] = 0 elif gym_action == 1: target_xy[1] = y_max elif gym_action == 2: target_xy[0] = 0 elif gym_action == 3: target_xy[0] = x_max elif gym_action == 4: diff = min(centroid[0], centroid[1]) target_xy = [target_xy[0] - diff, target_xy[1] - diff] elif gym_action == 5: diff = min(x_max - centroid[0], centroid[1]) target_xy = [target_xy[0] + diff, target_xy[1] - diff] elif gym_action == 6: diff = min(centroid[0], y_max - centroid[1]) target_xy = [target_xy[0] - diff, target_xy[1] + diff] elif gym_action == 7: diff = min(x_max - centroid[0], y_max - centroid[1]) target_xy = [target_xy[0] + diff, target_xy[1] + diff] # Assign action function action = FUNCTIONS.Attack_screen( "now", target_xy) if is_attack else FUNCTIONS.Move_screen( "now", target_xy) return [action]