def worker(remote, map_name, i): with sc2_env.SC2Env(map_name=map_name, step_mul=1, screen_size_px=(32, 32), minimap_size_px=(32, 32)) as env: available_actions = None result = None group_list = [] while True: cmd, data = remote.recv() if cmd == 'step': # if(common.check_group_list(env, result)): # result, xy_per_marine = common.init(env,result) reward = 0 if (len(group_list) == 0 or common.check_group_list(env, result)): print("init group list") result, xy_per_marine = common.init(env, result) group_list = common.update_group_list(result) action1 = data[0][0] action2 = data[0][1] func = actions.FUNCTIONS[action1[0]] #print("agent(",i," ) action : ", action1, " func : ", func) func = actions.FUNCTIONS[action2[0]] #print("agent(",i," ) action : ", action2, " func : ", func, "xy :", action2[1][1]) x, y = action2[1][1] move = True if (x == 0 and y == 0): move = False result = env.step(actions=[action1]) reward += result[0].reward done = result[0].step_type == environment.StepType.LAST if (331 in available_actions and move and not done): try: result = env.step(actions=[action2]) reward += result[0].reward done = result[0].step_type == environment.StepType.LAST except Exception as e: print("e :", e) ob = (result[0].observation["screen"] [_PLAYER_RELATIVE:_PLAYER_RELATIVE + 1] == 3).astype( int) # (1, 32, 32) selected = result[0].observation["screen"][ _SELECTED:_SELECTED + 1] # (1, 32, 32) # extra = np.zeros((1, 32, 32)) control_groups = result[0].observation["control_groups"] army_count = env._obs[0].observation.player_common.army_count # extra[0,0,0] = army_count # for id, group in enumerate(control_groups): # control_group_id = id # unit_id = group[0] # count = group[1] # #print("control_group_id :", control_group_id, " unit_id :", unit_id, " count :", count) # extra[0,1, control_group_id] = unit_id # extra[0,2, control_group_id] = count #ob = np.append(ob, selected, axis=0) # (2, 32, 32) #ob = np.append(ob, extra, axis=0) # (3, 32, 32) available_actions = result[0].observation["available_actions"] info = result[0].observation["available_actions"] if done: result = env.reset() if (len(group_list) == 0 or common.check_group_list(env, result)): print("init group list") result, xy_per_marine = common.init(env, result) group_list = common.update_group_list(result) # ob = result[0].observation["screen"] # reward = result[0].reward # done = result[0].step_type == environment.StepType.LAST info = result[0].observation["available_actions"] remote.send((ob, reward, done, info, army_count, control_groups, selected, xy_per_marine)) elif cmd == 'reset': result = env.reset() reward = 0 if (len(group_list) == 0 or common.check_group_list(env, result)): print("init group list") result, xy_per_marine = common.init(env, result) group_list = common.update_group_list(result) reward += result[0].reward ob = (result[0].observation["screen"] [_PLAYER_RELATIVE:_PLAYER_RELATIVE + 1] == 3).astype(int) selected = result[0].observation["screen"][ _SELECTED:_SELECTED + 1] # (1, 32, 32) # extra = np.zeros((1, 32, 32)) control_groups = result[0].observation["control_groups"] army_count = env._obs[0].observation.player_common.army_count # extra[0,0,0] = army_count # for id, group in enumerate(control_groups): # control_group_id = id # unit_id = group[0] # count = group[1] # #print("control_group_id :", control_group_id, " unit_id :", unit_id, " count :", count) # extra[0,1, control_group_id] = unit_id # extra[0,2, control_group_id] = count # ob = np.append(ob, selected, axis=0) # (2, 32, 32) # ob = np.append(ob, extra, axis=0) # (3, 32, 32) done = result[0].step_type == environment.StepType.LAST info = result[0].observation["available_actions"] available_actions = result[0].observation["available_actions"] remote.send((ob, reward, done, info, army_count, control_groups, selected, xy_per_marine)) elif cmd == 'close': remote.close() break elif cmd == 'get_spaces': remote.send((env.action_spec().functions[data], "")) elif cmd == "action_spec": remote.send((env.action_spec().functions[data])) else: raise NotImplementedError
def step(self, obs): super(CollectMineralShards, self).step(obs) if (len(self.group_list) == 0 or common.check_group_list(self.env, [obs])): print("init group list") obs = common.init(self.env, [obs])[0] self.group_list = common.update_group_list([obs]) #print("group_list ", self.group_list) player_relative = obs.observation["screen"][_PLAYER_RELATIVE] neutral_y, neutral_x = (player_relative == _PLAYER_NEUTRAL).nonzero() player_y, player_x = (player_relative == _PLAYER_FRIENDLY).nonzero() if not neutral_y.any() or not player_y.any(): return actions.FunctionCall(_NO_OP, []) r = random.randint(0, 1) if _MOVE_SCREEN in obs.observation["available_actions"] and r == 0: selected = obs.observation["screen"][_SELECTED] player_y, player_x = (selected == _PLAYER_FRIENDLY).nonzero() player = [int(player_x.mean()), int(player_y.mean())] points = [player] closest, min_dist = None, None other_dest = None my_dest = None if ("0" in self.dest_per_marine and "1" in self.dest_per_marine): if (self.group_id == 0): my_dest = self.dest_per_marine["0"] other_dest = self.dest_per_marine["1"] elif (self.group_id == 1): other_dest = self.dest_per_marine["0"] my_dest = self.dest_per_marine["1"] for p in zip(neutral_x, neutral_y): if (other_dest): dist = numpy.linalg.norm( numpy.array(other_dest) - numpy.array(p)) if (dist < 5): #print("continue since partner will take care of it ", p) continue pp = [p[0] // 2 * 2, p[1] // 2 * 2] if (pp not in points): points.append(pp) dist = numpy.linalg.norm(numpy.array(player) - numpy.array(p)) if not min_dist or dist < min_dist: closest, min_dist = p, dist solve_tsp = False if (my_dest): dist = numpy.linalg.norm( numpy.array(player) - numpy.array(my_dest)) if (dist < 2): solve_tsp = True if (my_dest is None): solve_tsp = True if (len(points) < 2): solve_tsp = False if (solve_tsp): # function for printing best found solution when it is found from time import clock init = clock() def report_sol(obj, s=""): print("cpu:%g\tobj:%g\ttour:%s" % \ (clock(), obj, s)) #print("points: %s" % points) n, D = mk_matrix(points, distL2) # multi-start local search #print("random start local search:", n) niter = 50 tour, z = multistart_localsearch(niter, n, D) #print("best found solution (%d iterations): z = %g" % (niter, z)) #print(tour) left, right = None, None for idx in tour: if (tour[idx] == 0): if (idx == len(tour) - 1): #print("optimal next : ", tour[0]) right = points[tour[0]] left = points[tour[idx - 1]] elif (idx == 0): #print("optimal next : ", tour[idx+1]) right = points[tour[idx + 1]] left = points[tour[len(tour) - 1]] else: #print("optimal next : ", tour[idx+1]) right = points[tour[idx + 1]] left = points[tour[idx - 1]] left_d = numpy.linalg.norm( numpy.array(player) - numpy.array(left)) right_d = numpy.linalg.norm( numpy.array(player) - numpy.array(right)) if (right_d > left_d): closest = left else: closest = right #print("optimal next :" , closest) self.dest_per_marine[str(self.group_id)] = closest #print("dest_per_marine", self.dest_per_marine) #dest_per_marine {'0': [56, 26], '1': [52, 6]} if (closest is None): return actions.FunctionCall(_NO_OP, []) return actions.FunctionCall(_MOVE_SCREEN, [_NOT_QUEUED, closest]) elif (len(self.group_list) > 0): player_p = [] for p in zip(player_x, player_y): if (p not in player_p): player_p.append(p) self.group_id = random.randint(0, len(self.group_list) - 1) return actions.FunctionCall( _SELECT_CONTROL_GROUP, [[_CONTROL_GROUP_RECALL], [int(self.group_id)]]) else: return actions.FunctionCall(_NO_OP, [])
def step(self, obs): super(CollectMineralShards, self).step(obs) if (len(self.group_list) == 0 or common.check_group_list(self.env, [obs])): print("init group list") obs, xy_per_marine = common.init(self.env, [obs]) obs = obs[0] self.group_list = common.update_group_list([obs]) #print("group_list ", self.group_list) player_relative = obs.observation["screen"][_PLAYER_RELATIVE] neutral_y, neutral_x = (player_relative == _PLAYER_NEUTRAL).nonzero() player_y, player_x = (player_relative == _PLAYER_FRIENDLY).nonzero() if not neutral_y.any() or not player_y.any(): return actions.FunctionCall(_NO_OP, []) r = random.randint(0, 1) if _MOVE_SCREEN in obs.observation["available_actions"] and r == 0: selected = obs.observation["screen"][_SELECTED] player_y, player_x = (selected == _PLAYER_FRIENDLY).nonzero() if(len(player_x) == 0): return actions.FunctionCall(_NO_OP, []) player = [int(player_x.mean()), int(player_y.mean())] points = [player] closest, min_dist = None, None other_dest = None my_dest = None if ("0" in self.dest_per_marine and "1" in self.dest_per_marine): if (self.group_id == 0): my_dest = self.dest_per_marine["0"] other_dest = self.dest_per_marine["1"] elif (self.group_id == 1): other_dest = self.dest_per_marine["0"] my_dest = self.dest_per_marine["1"] for p in zip(neutral_x, neutral_y): if (other_dest): dist = numpy.linalg.norm( numpy.array(other_dest) - numpy.array(p)) if (dist < 5): #print("continue since partner will take care of it ", p) continue pp = [p[0], p[1]] if (pp not in points): points.append(pp) dist = numpy.linalg.norm(numpy.array(player) - numpy.array(p)) if not min_dist or dist < min_dist: closest, min_dist = p, dist solve_tsp = False if (my_dest): dist = numpy.linalg.norm( numpy.array(player) - numpy.array(my_dest)) if (dist < 2): solve_tsp = True if (my_dest is None): solve_tsp = True if (len(points) < 2): solve_tsp = False if (solve_tsp): # function for printing best found solution when it is found from time import clock init = clock() def report_sol(obj, s=""): print("cpu:%g\tobj:%g\ttour:%s" % \ (clock(), obj, s)) #print("points: %s" % points) n, D = mk_matrix(points, distL2) # multi-start local search #print("random start local search:", n) niter = 50 tour, z = multistart_localsearch(niter, n, D) #print("best found solution (%d iterations): z = %g" % (niter, z)) #print(tour) left, right = None, None for idx in tour: if (tour[idx] == 0): if (idx == len(tour) - 1): #print("optimal next : ", tour[0]) right = points[tour[0]] left = points[tour[idx - 1]] elif (idx == 0): #print("optimal next : ", tour[idx+1]) right = points[tour[idx + 1]] left = points[tour[len(tour) - 1]] else: #print("optimal next : ", tour[idx+1]) right = points[tour[idx + 1]] left = points[tour[idx - 1]] left_d = numpy.linalg.norm( numpy.array(player) - numpy.array(left)) right_d = numpy.linalg.norm( numpy.array(player) - numpy.array(right)) if (right_d > left_d): closest = left else: closest = right #print("optimal next :" , closest) self.dest_per_marine[str(self.group_id)] = closest #print("dest_per_marine", self.dest_per_marine) #dest_per_marine {'0': [56, 26], '1': [52, 6]} if (closest is None): return actions.FunctionCall(_NO_OP, []) return actions.FunctionCall(_MOVE_SCREEN, [_NOT_QUEUED, closest]) elif (len(self.group_list) > 0): player_p = [] for p in zip(player_x, player_y): if (p not in player_p): player_p.append(p) self.group_id = random.randint(0, len(self.group_list) - 1) return actions.FunctionCall( _SELECT_CONTROL_GROUP, [[_CONTROL_GROUP_RECALL], [int(self.group_id)]]) else: return actions.FunctionCall(_NO_OP, [])