def step(self, obs): super(ZergAgent, self).step(obs) if obs.first(): player_y, player_x = ( obs.observation.feature_minimap.player_relative == features.PlayerRelative.SELF).nonzero() xmean = player_x.mean() ymean = player_y.mean() roaches = self.get_units_by_type(obs, units.Zerg.Roach) hydralisks = self.get_units_by_type(obs, units.Zerg.Hydralisk) zerglings = self.get_units_by_type(obs, units.Zerg.Zergling) Sentries = self.get_units_by_type(obs, units.Protoss.Sentry) Stalkers = self.get_units_by_type(obs, units.Protoss.Stalker) if self.can_do(obs, actions.FUNCTIONS.Effect_ForceField_screen.id): act_id, act_args = self.step_run(obs) #TODO : 지금은 무조건 역장으로 #screen = self.ssize #target = [random.randint(0, screen),random.randint(0, screen)] #TODO : target을 학습시키는 agent를 만드는 것이 목표 #return actions.FUNCTIONS.Effect_ForceField_screen("now", target) #print("제가 고른 act_id는 ", act_id) #print("제가 고른 act_args는 ", act_args) return actions.FunctionCall(act_id, act_args) #return actions.FUNCTIONS.no_op() elif self.can_do(obs, actions.FUNCTIONS.Attack_screen.id): if len(roaches) > 0: enemy = random.choice(roaches) if len(hydralisks) > 0: enemy = random.choice(hydralisks) elif len(zerglings) > 0: enemy = random.choice(zerglings) else: return actions.FUNCTIONS.no_op() #return actions.FUNCTIONS.Attack_minimap("queued", (0, 32)) return actions.FUNCTIONS.Attack_minimap("queued", (enemy.x, enemy.y)) #ISSUE : Attack_minimap이 불가능한 상황에서 명령을 실행함 elif self.can_do(obs, actions.FUNCTIONS.select_army.id): return actions.FUNCTIONS.select_army("select") return actions.FUNCTIONS.no_op()
def move_unit(obs, mode): # mode= 1,2,3,4 & up,down,left,right selected_unit_position_y, selected_unit_position_x = ( obs[0].observation["screen"][_SELECTED_UNIT] == True).nonzero() target_x, target_y = np.mean(selected_unit_position_x), np.mean(selected_unit_position_y) if mode == 1: #up dest_x, dest_y = np.clip(target_x, 0, 15), np.clip(target_y - 3, 0, 15) elif mode == 2: #down dest_x, dest_y = np.clip(target_x, 0, 15), np.clip(target_y + 3, 0, 15) elif mode == 3: #left dest_x, dest_y = np.clip(target_x - 3, 0, 15), np.clip(target_y, 0, 15) elif mode == 4: #right dest_x, dest_y = np.clip(target_x + 3, 0, 15), np.clip(target_y, 0, 15) action = actions.FunctionCall(_MOVE_SCREEN, [_NOT_QUEUED, [dest_x, dest_y]]) # move Up return action
def step(self, action): """Apply actions, step the world forward, and return observations""" global episode_reward # global variable defined previously action = actions_to_choose( action ) # Actions of Hallucination and movement Make a function that selects among hallucination functions obs = super(Environment, self).step([actions.FunctionCall( _NO_OP, [])]) ## change the action for Hallucination or attack ? # The method calls an observation that moves the screen observation = obs r = obs[0].reward done = obs[0].step_type == environment.StepType.LAST # Episode_over episode_reward += r return observation, r, done, { } # Return observation, reward, and episode_over
def target_lowest_hp(self): hostiles = self.obs.observation['feature_screen'][ _PLAYER_RELATIVE] == _PLAYER_HOSTILE health = self.obs.observation['feature_screen'][_HIT_POINT_RATIO] hostile_hp = hostiles * health hostile_hp = np.array(hostile_hp) hostile_hp[hostile_hp == 0] = _MAX_INT enemy_y, enemy_x = np.unravel_index(np.argmin(hostile_hp, axis=None), hostile_hp.shape) return [ actions.FunctionCall(_ATTACK, [_NOT_QUEUED, [enemy_x, enemy_y]]) ]
def moveNumberZeroZero(self, obs): current_state = get_current_state(obs) smart_action = self.act_build_base(current_state["state_others"]) self.previous_action = smart_action if actions.FUNCTIONS.move_camera.id in obs.observation[ 'available_actions']: if smart_action == ActionBaseDto.build_barracks( ) or smart_action == ActionBaseDto.build_supply_depot( ) or smart_action == ActionBaseDto.build_command_center(): return actions.FUNCTIONS.move_camera( (self.camera_position_start.x, self.camera_position_start.y)) else: print("could not move camera, available actions are " + str(obs.observation['available_actions'])) return actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])
def step(self, obs): minimap = np.array(obs.observation['feature_minimap'], dtype=np.float32) minimap = np.expand_dims(U.preprocess_minimap(minimap), axis=0) screen = np.array(obs.observation['feature_screen'], dtype=np.float32) screen = np.expand_dims(U.preprocess_screen(screen), axis=0) # TODO: only use available actions info = np.zeros([1, self.isize], dtype=np.float32) info[0] = U.get_info(obs) feed = {self.minimap: minimap, self.screen: screen, self.info: info} non_spatial_action, spatial_action = self.sess.run( [self.non_spatial_action, self.spatial_action], feed_dict=feed) # Select an action and a spatial target non_spatial_action = non_spatial_action.ravel() spatial_action = spatial_action.ravel() valid_actions = obs.observation['available_actions'] act_id = valid_actions[np.argmax(non_spatial_action[valid_actions])] target = np.argmax(spatial_action) target = [int(target // self.ssize), int(target % self.ssize)] if False: print(actions.FUNCTIONS[act_id].name, target) # Epsilon greedy exploration if self.training and np.random.rand() < self.epsilon[0]: act_id = np.random.choice(valid_actions) if self.training and np.random.rand() < self.epsilon[1]: if np.random.rand() < self.epsilon[2]: target[0] = int(np.random.randint(0, self.ssize)) target[1] = int(np.random.randint(0, self.ssize)) else: dy = np.random.randint(-4, 5) target[0] = int(max(0, min(self.ssize - 1, target[0] + dy))) dx = np.random.randint(-4, 5) target[1] = int(max(0, min(self.ssize - 1, target[1] + dx))) # Set act_id and act_args act_args = [] for arg in actions.FUNCTIONS[act_id].args: if arg.name in ('screen', 'minimap', 'screen2'): act_args.append([target[1], target[0]]) else: act_args.append([0]) # TODO: Be careful return actions.FunctionCall(act_id, act_args)
def actions_to_pysc2(self, a_0, arg_ids): """Convert agent action representation to FunctionCall representation.""" height, width = self.size a_l = [] for arg_type in actions.FUNCTIONS._func_list[a_0].args: arg_id = arg_ids[arg_type.id] if arg_type in [ actions.TYPES.minimap, actions.TYPES.screen, actions.TYPES.screen2 ]: arg = [arg_id % width, arg_id // height] else: arg = [arg_id] a_l.append(arg) action = actions.FunctionCall(a_0, a_l) return action
def build_supply_depot(self, count, obs): unit_type = obs.observation['screen'][_UNIT_TYPE] if self.move_number == 0: return self.select_workers(unit_type) elif self.move_number == 1: if count < 10 and _BUILD_SUPPLY_DEPOT in obs.observation[ 'available_actions']: if self.cc_y.any(): target = self.transformDistance(round(self.cc_x.mean()), 15, round(self.cc_y.mean()), -15 + 7 * count) return actions.FunctionCall(_BUILD_SUPPLY_DEPOT, [_NOT_QUEUED, target]) elif self.move_number == 2: if _HARVEST_GATHER in obs.observation['available_actions']: return self.return_worker_to_harvest(obs)
def step(self, obs): super(DefeatRoaches, self).step(obs) if _ATTACK_SCREEN in obs.observation["available_actions"]: player_relative = obs.observation["screen"][_PLAYER_RELATIVE] roach_y, roach_x = (player_relative == _PLAYER_HOSTILE).nonzero() if not roach_y.any(): self.action = _NO_OP param = [] else: index = np.argmax(roach_y) target_xy = [roach_x[index], roach_y[index]] target = roach_x[index] * self.config.sz + roach_y[index] self.action = _ATTACK_SCREEN param = [[_NOT_QUEUED], target_xy] self.param[self.config.arg_idx[FUNCTIONS[ self.action].args[0].name]] = _NOT_QUEUED self.param[self.config.arg_idx[FUNCTIONS[ self.action].args[1].name]] = target # 函数参数 elif _SELECT_ARMY in obs.observation["available_actions"]: self.action = _SELECT_ARMY self.param[self.config.arg_idx[FUNCTIONS[ self.action].args[0].name]] = _SELECT_ALL param = [[_SELECT_ALL]] else: self.action = _NO_OP param = [] self.states.append([ copy.deepcopy(obs.observation), copy.deepcopy(self.action), copy.deepcopy(self.param) ]) if len(self.states) == DATA_SIZE: new_file_name = str(uuid.uuid1()) # np.save('replay/' + config.full_id() +'/{}'.format(new_file_name), np.array(self.states)) pickle.dump( self.states, open( 'replay/' + config.full_id() + '/{}'.format(new_file_name) + '_{}.replay'.format(DATA_SIZE), 'wb')) self.states = [] return actions.FunctionCall(self.action, param)
def step(self, obs): super(MoveToBeacon, self).step(obs) if _MOVE_SCREEN in obs.observation["available_actions"]: player_relative = obs.observation["screen"][_PLAYER_RELATIVE] neutral_y, neutral_x = ( player_relative == _PLAYER_NEUTRAL).nonzero() if not neutral_y.any(): self.action = _NO_OP # 动作函数id param = [] else: target_xy = [int(neutral_x.mean()), int(neutral_y.mean())] target = int(neutral_x.mean()) * self.config.sz + int( neutral_y.mean()) self.action = _MOVE_SCREEN # 动作函数id self.param[self.config.arg_idx[FUNCTIONS[ self.action].args[0].name]] = _NOT_QUEUED self.param[self.config.arg_idx[FUNCTIONS[ self.action].args[1].name]] = target # 函数参数 param = [[_NOT_QUEUED], target_xy] else: self.action = _SELECT_ARMY # 动作函数id self.param[self.config.arg_idx[FUNCTIONS[ self.action].args[0].name]] = _SELECT_ALL # 函数参数 param = [[_SELECT_ALL]] # self.states.append(np.array([obs.observation, self.action, self.param])) self.states.append([ copy.deepcopy(obs.observation), copy.deepcopy(self.action), copy.deepcopy(self.param) ]) if len(self.states) == DATA_SIZE: new_file_name = str(uuid.uuid1()) # np.save('replay/' + config.full_id() +'/{}'.format(new_file_name), np.array(self.states)) pickle.dump( self.states, open( 'replay/' + config.full_id() + '/{}'.format(new_file_name) + '_{}.replay'.format(DATA_SIZE), 'wb')) self.states = [] return actions.FunctionCall(self.action, param)
def FirstStep(self, obs): super(SuperAgent, self).FirstStep(obs) self.move_number = 0 self.sharedData.numStep = 0 self.sharedData.numAgentStep = 0 self.sharedData.__init__() cc_y, cc_x = (self.unit_type == Terran.CommandCenter).nonzero() if len(cc_y) > 0: middleCC = FindMiddle(cc_y, cc_x) cameraCornerNorthWest, cameraCornerSouthEast = GetScreenCorners( obs) miniMapLoc = Scale2MiniMap(middleCC, cameraCornerNorthWest, cameraCornerSouthEast) self.sharedData.commandCenterLoc = [miniMapLoc] self.sharedData.buildingCompleted[Terran.CommandCenter].append( Building(middleCC)) self.sharedData.unitTrainValue = self.unitPower # actions: self.current_action = None # states: self.current_state = np.zeros(SUPER_STATE.SIZE, float) self.previous_scaled_state = np.zeros(SUPER_STATE.SIZE, float) self.current_scaled_state = np.zeros(SUPER_STATE.SIZE, float) self.accumulatedTrainReward = 0.0 self.subAgentsActions = {} for sa in range(NUM_SUB_AGENTS): self.subAgentsActions[sa] = None self.subAgents[sa].FirstStep(obs) if len(self.sharedData.commandCenterLoc) >= 1: self.go2BaseAction = actions.FunctionCall( SC2_Actions.MOVE_CAMERA, [SwapPnt(self.sharedData.commandCenterLoc[0])]) else: self.go2BaseAction = SC2_Actions.DO_NOTHING_SC2_ACTION self.maxSupply = False
def step(self, obs): ## feed to network minimap = np.array(obs.observation['feature_minimap'], dtype=np.float32) minimap = np.expand_dims(PP.preprocess_minimap(minimap), axis=0) screen = np.array(obs.observation['feature_screen'], dtype=np.float32) screen = np.expand_dims(PP.preprocess_screen(screen), axis=0) structure = np.expand_dims(PP.preprocess_structure(obs), axis=0) feed = { self.minimap: minimap, self.screen: screen, self.structure: structure } non_spatial_action, spatial_action = self.sess.run( [self.non_spatial_action, self.spatial_action], feed_dict=feed) ## choose spatial and non-spatial action non_spatial_action = non_spatial_action.ravel() valid_actions = obs.observation['available_actions'] act_id = valid_actions[np.argmax(non_spatial_action[valid_actions])] spatial_action = spatial_action.ravel() target = np.argmax(spatial_action) target = [int(target // self.ssize), int(target % self.ssize)] ## epsilon greedy exploration if self.training and np.random.rand() < self.epsilon[0]: act_id = np.random.choice(valid_actions) if self.training and np.random.rand() < self.epsilon[1]: range = int(self.random_range) dy = np.random.randint(-range, range) target[0] = int(max(0, min(self.ssize - 1, target[0] + dy))) dx = np.random.randint(-range, range) target[1] = int(max(0, min(self.ssize - 1, target[1] + dx))) ## return function act_args = [] for arg in actions.FUNCTIONS[act_id].args: if arg.name in ('screen', 'minimap', 'screen2'): ## spatial arg act_args.append([target[1], target[0]]) else: act_args.append([0]) ## non-spatial arg return actions.FunctionCall(act_id, act_args)
def construct_action(self, base_actions, base_action_spec, sub3, sub4, sub5, sub6, sub7, sub8, sub9, sub10, sub11, sub12, x0, y0, x1, y1, x2, y2): actions = [] for env_num, spec in enumerate(base_action_spec): #print("spec", spec.args) args = [] for arg_idx, arg in enumerate(spec.args): #print("arg", arg) #print("arg.id", arg.id) if(arg.id==0): # screen (64,64) x0, y0 args.append([int(x0[env_num]), int(y0[env_num])]) elif(arg.id==1): # minimap (64,64) x1, y1 args.append([int(x1[env_num]), int(y1[env_num])]) elif(arg.id==2): # screen2 (64,64) x2, y2 args.append([int(x2[env_num]), y2[env_num]]) elif(arg.id==3): # pi3 queued (2) args.append([int(sub3[env_num])]) elif(arg.id==4): # pi4 control_group_act (5) args.append([int(sub4[env_num])]) elif(arg.id==5): # pi5 control_group_id 10 args.append([int(sub5[env_num])]) elif(arg.id==6): # pi6 select_point_act 4 args.append([int(sub6[env_num])]) elif(arg.id==7): # pi7 select_add 2 args.append([int(sub7[env_num])]) elif(arg.id==8): # pi8 select_unit_act 4 args.append([int(sub8[env_num])]) elif(arg.id==9): # pi9 select_unit_id 500 args.append([int(sub9[env_num])]) elif(arg.id==10): # pi10 select_worker 4 args.append([int(sub10[env_num])]) elif(arg.id==11): # pi11 build_queue_id 10 args.append([int(sub11[env_num])]) elif(arg.id==12): # pi12 unload_id 500 args.append([int(sub12[env_num])]) else: raise NotImplementedError("cannot construct this arg", spec.args) action = sc2_actions.FunctionCall(base_actions[env_num], args) actions.append(action) return actions
def _compute_retreat(self, selected, enemy_unit_density): enemy_com = np.flip( np.array(ndimage.measurements.center_of_mass(enemy_unit_density)), 0) unit_position = np.flip( np.array(ndimage.measurements.center_of_mass(selected)), 0) direction_vector = -(enemy_com - unit_position) max_movement_x = self._compute_movement_multiple( direction_vector[0], unit_position[0], enemy_unit_density.shape[1]) max_movement_y = self._compute_movement_multiple( direction_vector[1], unit_position[1], enemy_unit_density.shape[0]) max_movement = min(max_movement_x, max_movement_y) retreat_target = np.round(max_movement * direction_vector + unit_position) return actions.FunctionCall(actions.FUNCTIONS.Move_screen.id, [_NOT_QUEUED, retreat_target])
def make_one_action(self, action_id, spatial_coordinates): """make_one_action :param action_id: The action id to perform. :param spatial_coordinates: The co-ordinates to perform the action at. """ args = list(self.default_args[action_id]) assert all(s < self.dim for s in spatial_coordinates) # If the action is a select action, then convert the point first, before performing it. if action_id == self.rect_select_action_id: args.extend(convert_point_to_rectangle(spatial_coordinates, self.rect_delta, self.dim)) elif self.is_spatial[action_id]: # Flip the co-ordinates from (x, y) to (y, x). args.append(spatial_coordinates[::-1]) return actions.FunctionCall(action_id, args)
def _safe_step(self, action): self._num_step += 1 if action[0] not in self.available_actions: logger.warning("Attempted unavailable action: %s", action) action = [_NO_OP] try: obs = self._env.step([actions.FunctionCall(action[0], action[1:])])[0] except KeyboardInterrupt: logger.info("Interrupted. Quitting...") return None, 0, True, {} except Exception: logger.exception("An unexpected error occurred while applying action to environment.") return None, 0, True, {} self.available_actions = obs.observation['available_actions'] reward = obs.reward self._episode_reward += reward self._total_reward += reward return obs, reward, obs.step_type == StepType.LAST, {}
def _check_and_return_action(self, obs): """ Check that the first action in self._action_list is among available actions, and return it. If it is not, return call the callback argument provided by the last agent, empty the action list, and return NO_OP. Precondition: self._action_list is not empty. :param obs: The observation provided by pysc2. :return: The first action of the action list if it is valid, else NO_OP """ current_action = self._action_list.pop(0) if current_action.function in obs.observation["available_actions"] \ and self._check_argument(current_action): self._success = True return current_action else: self._action_list = [] self._success = False return actions.FunctionCall(NO_OP, [])
def step(self, timestep): super(BuildMarinesAgent001, self).step(timestep) if self.supply_depot_count == 0: # build supply depot if self.functions.Build_SupplyDepot_screen.id in timestep.observation.available_actions: unit_type = timestep.observation.feature_screen.unit_type self.cmdcenters_y, self.cmdcenters_x = ( unit_type == self.terran_commandcenter).nonzero() target_point = [ int(self.cmdcenters_x.mean()), int(self.cmdcenters_y.mean()) - 15 ] self.supply_depot_count += 1 return actions.FunctionCall( self.functions.Build_SupplyDepot_screen.id, [self.cmd_screen, target_point]) else: # select scv unit_type = timestep.observation.feature_screen.unit_type scvs_y, scvs_x = (unit_type == self.terran_scv).nonzero() target_unit = [scvs_x[0], scvs_y[0]] return actions.FunctionCall(self.functions.select_point.id, [self.cmd_screen, target_unit]) if self.barracks_count == 0: # build barracks if self.functions.Build_Barracks_screen.id in timestep.observation.available_actions: target_point = [ int(self.cmdcenters_x.mean()) + 20, int(self.cmdcenters_y.mean()) ] self.barracks_count += 1 return actions.FunctionCall( self.functions.Build_Barracks_screen.id, [self.cmd_screen, target_point]) if self.functions.Train_Marine_quick.id in timestep.observation.available_actions: # train marines if timestep.observation['player'][ self.supply_used_id] < timestep.observation['player'][ self.supply_max_id]: return actions.FunctionCall( self.functions.Train_Marine_quick.id, [self.queued]) else: # select barracks unit_type = timestep.observation.feature_screen.unit_type barracks_y, barracks_x = ( unit_type == self.terran_barrack_id).nonzero() if not barracks_y.any(): return actions.FunctionCall(self.functions.no_op.id, []) target_point = [int(barracks_x.mean()), int(barracks_y.mean())] return actions.FunctionCall(self.functions.select_point.id, [self.cmd_screen, target_point]) return actions.FunctionCall(self.functions.no_op.id, [])
def test_defeat_zerglings(self): FLAGS(sys.argv) with sc2_env.SC2Env("DefeatZerglingsAndBanelings", step_mul=self.step_mul, visualize=True, game_steps_per_episode=self.steps * self.step_mul) as env: obs = env.step(actions=[sc2_actions.FunctionCall(_NO_OP, [])]) player_relative = obs[0].observation["screen"][_PLAYER_RELATIVE] # Break Point!! print(player_relative) agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, self.steps) self.assertEqual(agent.steps, self.steps)
def step(self, obs): policy = self._policy_net_predict(obs) valid_actions = obs.observation['available_actions'] # e-greedy if np.random.random() < 0.3: # act_id, target = self._exploit_random(valid_actions) act_id, target = self._exploit_distribution(policy, valid_actions) else: act_id, target = self._exploit_max(policy, valid_actions) # policy-dependent, encourage exploration with entropy regularization # act_id, target = self._exploit_distribution(policy, valid_actions) act_args = util.get_action_arguments(act_id, target) self.steps += 1 return actions.FunctionCall(act_id, act_args)
def Action2SC2Action(self, obs, a, moveNum): if SC2_Actions.STOP in obs.observation['available_actions']: sc2Action = SC2_Actions.STOP_SC2_ACTION else: sc2Action = SC2_Actions.DO_NOTHING_SC2_ACTION if self.current_action > BaseAttackActions.DO_NOTHING: goTo = self.enemyBuildingGridLoc2ScreenLoc[ self.current_action - BaseAttackActions.START_IDX_ATTACK].copy() if SC2_Actions.ATTACK_SCREEN in obs.observation[ 'available_actions']: sc2Action = actions.FunctionCall( SC2_Actions.ATTACK_SCREEN, [SC2_Params.NOT_QUEUED, SwapPnt(goTo)]) return sc2Action, True
def step(self, obs): super(TestAgent, self).step(obs) avb = obs.observation.available_actions X = self._get_unit_data(obs) y = self.model.predict(X) print(y) function_id, args = self._translateOutputToAction(y, avb) """ function_id = np.random.choice(avb) args = [[np.random.randint(0, size) for size in arg.sizes] for arg in self.action_spec.functions[function_id].args] """ print(function_id, args) score_gained, self.pUnits, self.eUnits = evaluate_step(obs, self.pUnits, self.eUnits) self.score += score_gained return actions.FunctionCall(function_id, args)
def disengage_damaged(self): dmg_threshold = 28 # print(self.obs.observation['last_actions']) # self.obs.observation['multi_select'] ms = np.array(self.obs.observation['multi_select']) if ms.size == 0: return [] if ms[:, 2].min() >= dmg_threshold: return [] # no one is damaged return [ actions.FunctionCall(_SELECT_UNIT, [[0], [ms[:, 2].argmin()]]), *self.disengage(), *noops(7) ]
def select_onscreen(self): player_y, player_x = (self.obs.observation['feature_screen'] [_PLAYER_RELATIVE] == _PLAYER_SELF).nonzero() if not player_y.any(): return [] min_y = player_y.min() max_y = player_y.max() min_x = player_x.min() max_x = player_x.max() return [ actions.FunctionCall( _SELECT_RECT, [_SELECT_NO_ADD, [min_x, min_y], [max_x, max_y]]) ]
def move_unit(obs, mode): # mode= 1,2,3,4 & up,down,left,right obs = obs.reshape(32,32,2) obs = obs[:,:,0] selected_unit_position_y, selected_unit_position_x = ( obs == friendly).nonzero() target_x, target_y = np.mean(selected_unit_position_x), np.mean(selected_unit_position_y) if mode == 1: #up dest_x, dest_y = np.clip(target_x, 0, 31), np.clip(target_y - 1, 0, 31) elif mode == 2: #down dest_x, dest_y = np.clip(target_x, 0, 31), np.clip(target_y + 1, 0, 31) elif mode == 3: #left dest_x, dest_y = np.clip(target_x - 1, 0, 31), np.clip(target_y, 0, 31) elif mode == 4: #right dest_x, dest_y = np.clip(target_x + 1, 0, 31), np.clip(target_y, 0, 31) action = actions.FunctionCall(_MOVE_SCREEN, [_NOT_QUEUED, [dest_x, dest_y]]) # move Up return action
def handle_last_action(self, obs): cm = str(p.get_minerals(obs)) cv = str(p.get_vespene(obs)) iwc = str(p.get_idle_worker_count(obs)) depot_count = str(self.supply_depots) refinery_count = str(self.refineries) command_centers = str(get_command_center_amount(self.unit_type)) food_used = str(p.get_food_used(obs)) food_cap = str(p.get_food_cap(obs)) sp_attempts = str(self.build_supply_depot_attempts) sp_attempts_f = str(self.build_supply_depot_attempts_failed) refinery_attempts = str(self.build_refinery_attempts) refinery_attempts_f = str(self.build_refinery_attempts_failed) cc_attempts = str(self.build_cc_attempts) cc_attempts_f = str(self.build_cc_attempt_failed) score = cs.get_score(obs) data = [sp_attempts, sp_attempts_f, depot_count, refinery_attempts, refinery_attempts_f, refinery_count, cc_attempts, cc_attempts_f, cm, cv, iwc, command_centers, food_used + "/" + food_cap, str(score)] with open('/home/kenn/Development/sc2-bot/CustomAgents/scores.txt', 'a+') as f: f.write('{0[0]:<15}{0[1]:<15}{0[2]:<15}{0[3]:<15}{0[4]:<15}{0[5]:<15}{0[6]:<15}{0[7]:<15}{0[8]:<15}{0[9]:<15}{0[10]:<15}{0[11]:<15}{0[12]:<15}{0[13]:<15}\n'.format(data)) # If we score less than 4000 we are doing so poorly, we want to learn that it was very bad. # Symbolizes the ultimate loss. self.qlearn.learn(str(self.previous_state), self.previous_action, int(score) - 4000, 'terminal') self.previous_state = None self.previous_action = None self.move_number = 0 self.supply_depots = 0 self.refineries = 0 self.builder_iterator = 0 self.build_supply_depot_attempts = 0 self.build_supply_depot_attempts_failed = 0 self.build_refinery_attempts = 0 self.build_refinery_attempts_failed = 0 self.build_cc_attempts = 0 self.build_cc_attempt_failed = 0 self.initializing = 0 self.qlearn.q_table.to_pickle(DATA_FILE + '.gz', 'gzip') return actions.FunctionCall(_NOOP, [])
def step(self, obs): super(OurAgent, self).step(obs) #get location of our base if self.base_top_left is None: player_y, player_x = (obs.observation["minimap"][_PLAYER_RELATIVE] == _PLAYER_SELF).nonzero() # base_top_left = map height / 2 self.base_top_left = player_y.mean() <= 31 #FOR TESTING FUNCTIONS VIA USER INPUT keyPressed = self.inputMngr.checkKeyPressed() if (keyPressed == 1): print("1 PRESSED: SELECTING LARVA") return self.select_larva(obs) elif (keyPressed == 2): print("2 PRESSED: SELECTING DRONE") return self.select_drone(obs) elif (keyPressed == 3): print("3 PRESSED: BUILDING SPAWN TOOL") return self.build_spawningpool(obs) elif (keyPressed == 4): print("4 PRESSED: MOVING SELECTED UNIT TO POINT") return self.move_to_point(obs) elif (keyPressed == 5): print("5 PRESSED: TRAINING ZERGLING") return self.train_zergling(obs) elif (keyPressed == 6): print("6 PRESSED: SELECTING ZERGLING") return self.select_zergling(obs) elif (keyPressed == 10): print("UP PRESSED: MOVING UP") return self.move_in_direction(obs, [0, -20]) elif (keyPressed == 11): print("DOWN PRESSED: MOVING DOWN") return self.move_in_direction(obs, [0, 20]) elif (keyPressed == 12): print("LEFT PRESSED: MOVING LEFT") return self.move_in_direction(obs, [-20, 0]) elif (keyPressed == 13): print("RIGHT PRESSED: MOVING RIGHT") return self.move_in_direction(obs, [20, 0]) return actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])
def step(self, drt): """Apply actions, step the world forward, and return observations.""" if self._state == environment.StepType.LAST: return self.reset() # get the obs and know the unit pos self._obs = self._controller.observe() obs = self._features.transform_obs(self._obs.observation) # check any unit is selected if _MOVE_SCREEN not in obs['available_actions']: action = actions.FunctionCall(_SELECT_ARMY, [_SELECT_ALL]) action = self._features.transform_action(self._obs.obsercation, action) self._controller.act(action) self._state = environment.StepType.MID self._step() return self.step(drt) else: raw = self._obs.observation.raw_data pos_x = 0 pos_y = 0 unit_n = 0 for u in raw.units: pos_x += u.pos.x pos_y += u.pos.y unit_n += 1 pos_x /= unit_n pos_y /= unit_n if drt == 0: target = [int(pos_x), int(pos_y) + 1] elif drt == 1: target = [int(pos_x), int(pos_y) - 1] elif drt == 2: target = [int(pos_x) + 1, int(pos_y)] elif drt == 3: target = [int(pos_x) - 1, int(pos_y)] else: raise ValueError("Invalid direction code!") action = self._features.transform_action(self._obs.observation, action) self._controller.act(action) self._state = environment.StepType.MID return self._step()
def step(self, obs): self.steps += 1 self.reward += obs.reward # 1: 每局开始记录所有存活单位的信息 if self.is_first(obs): self.all_friends_tag = self.get_raw_friends_data(obs)[:, 0] self.all_enemies_tag = self.get_raw_opponents_data(obs)[:, 0] self.friends_tag_2_id = { tag: id for id, tag in enumerate(self.all_friends_tag) } self.friends_id_2_tag = dict(enumerate(self.all_friends_tag)) self.enemies_tag_2_id = { tag: id for id, tag in enumerate(self.all_enemies_tag) } self.enemies_id_2_tag = dict(enumerate(self.all_enemies_tag)) return actions.FunctionCall(0, [])
def step(self, obs): if obs.first(): self.clean() self.get_units(obs, True) if obs.last(): self.get_units(obs, False) with open('units.txt', 'a') as data: data.write("\n") data.write("\n") data.write(str(self.start_units_1)) data.write(str(self.start_units_2)) data.write("\n") data.write(str(self.end_units_1)) data.write(str(self.end_units_2)) return actions.FunctionCall(0, [])