async def observation(self, game_loop=None): if game_loop is not None: result = await self._execute(observation=sc_pb.RequestObservation( game_loop=game_loop)) else: result = await self._execute( observation=sc_pb.RequestObservation()) assert result.HasField("observation") if not self.in_game or result.observation.player_result: # Sometimes game ends one step before results are available if not result.observation.player_result: result = await self._execute( observation=sc_pb.RequestObservation()) assert result.observation.player_result player_id_to_result = {} for pr in result.observation.player_result: player_id_to_result[pr.player_id] = Result(pr.result) self._game_result = player_id_to_result # if render_data is available, then RGB rendering was requested if self._renderer and result.observation.observation.HasField( "render_data"): await self._renderer.render(result.observation) return result
async def observation(self): result = await self._execute(observation=sc_pb.RequestObservation()) if (not self.in_game) or len(result.observation.player_result) > 0: # Sometimes game ends one step before results are available if len(result.observation.player_result) == 0: result = await self._execute(observation=sc_pb.RequestObservation()) assert len(result.observation.player_result) > 0 player_id_to_result = {} for pr in result.observation.player_result: player_id_to_result[pr.player_id] = Result(pr.result) self._game_result = player_id_to_result return result
def observe(self, disable_fog=False, target_game_loop=0): """Get a current observation.""" obs = self._client.send(observation=sc_pb.RequestObservation( game_loop=target_game_loop, disable_fog=disable_fog)) if obs.observation.game_loop == 2**32 - 1: logging.info("Received stub observation.") if not obs.player_result: raise ValueError( "Expect a player result in a stub observation") elif self._last_obs is None: raise RuntimeError( "Received stub observation with no previous obs") # Rather than handling empty obs through the code, regurgitate the last # observation (+ player result, sub actions). new_obs = copy.deepcopy(self._last_obs) del new_obs.actions[:] new_obs.actions.extend(obs.actions) new_obs.player_result.extend(obs.player_result) obs = new_obs self._last_obs = None else: self._last_obs = obs if FLAGS.sc2_log_actions and obs.actions: sys.stderr.write(" Executed actions ".center(60, "<") + "\n") for action in obs.actions: sys.stderr.write(str(action)) sys.stderr.flush() return obs
def reset(self): # Move the camera in any direction # This runs the ResetEpisode trigger built into the map self.decomposed_rewards = [] action = actions.FUNCTIONS.move_camera([0, 0]) self.current_obs = self.sc2_env.step([action])[0] if self.reset_steps >= 10: self.sc2_env.reset() self.reset_steps = 0 self.reset_steps += 1 self.end_state = None self.decision_point = 1 self.num_waves = 0 data = self.sc2_env._controllers[0]._client.send( observation=sc_pb.RequestObservation()) actions_space = self.sc2_env._controllers[0]._client.send( action=sc_pb.RequestAction()) data = data.observation.raw_data.units self.getRewards(data) # Get channel states # state = self.get_channel_state(self.current_obs) # Get custom states state_1 = self.get_custom_state(data, 1) state_2 = self.get_custom_state(data, 2) for rt in self.reward_types: self.decomposed_reward_dict[rt] = 0 self.last_decomposed_reward_dict[rt] = 0 # self.use_custom_ability(action_to_ability_id['switch_player']) return state_1, state_2
def step(self, action, skip=False): end = False state = None get_income = False ### ACTION TAKING ### if action < 4: self.use_custom_ability(action_to_ability_id[action]) elif action > 4: print("Invalid action: check final layer of network") action = actions.FUNCTIONS.no_op() self.current_obs = self.sc2_env.step([action])[0] # Get reward from data data = self.sc2_env._controllers[0]._client.send( observation=sc_pb.RequestObservation()) data = data.observation.raw_data.units end, get_income = self.getRewards(data) state = self.get_custom_state(data) if not skip: # Get channel states # state = self.get_channel_state(self.current_obs) # Get custom states self.decomposed_rewards = [] for rt in self.reward_types: value_reward = self.decomposed_reward_dict[ rt] - self.last_decomposed_reward_dict[rt] self.decomposed_rewards.append(value_reward) for rt in self.reward_types: self.last_decomposed_reward_dict[ rt] = self.decomposed_reward_dict[rt] if end: self.end_state = state return state, end, get_income
async def observation(self): result = await self._execute(observation=sc_pb.RequestObservation()) if len(result.observation.player_result) > 0: player_id_to_result = {} for pr in result.observation.player_result: player_id_to_result[pr.player_id] = Result(pr.result) self._game_result = player_id_to_result return result
def step(self, action, player): done = False dp = False data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation()) data = data.observation.raw_data.units # pretty_print_units(data) #input("pausing at step") if len(action) > 0: if player == 1: fifo = self.fifo_player_1 else: fifo = self.fifo_player_2 ## ACTION TAKING ### current_player = self.get_current_player(data) # print(current_player) if current_player != player: # print('switch') self.use_custom_ability(action_to_ability_id['switch_player']) for a_index, num_action in enumerate(action): for _ in range(int(num_action)): # print(a_index, num_action) self.use_custom_ability(action_to_ability_id[a_index]) fifo.append(a_index) if len(fifo) > self.building_limiation: del fifo[0] action = actions.FUNCTIONS.no_op() self.current_obs = self.sc2_env.step([action])[0] else: action = actions.FUNCTIONS.no_op() self.current_obs = self.sc2_env.step([action])[0] # Get reward from data done, dp = self.getRewards(data) if dp or done: # Get channel states # state = self.get_channel_state(self.current_obs) # Get custom states state_1 = self.get_custom_state(data, 1) state_2 = self.get_custom_state(data, 2) if done: self.end_state_1 = state_1 self.end_state_2 = state_2 self.decomposed_rewards = [] for rt in self.reward_types: value_reward = self.decomposed_reward_dict[rt] - self.last_decomposed_reward_dict[rt] self.decomposed_rewards.append(value_reward) # TODO: consider to merge two for for rt in self.reward_types: self.last_decomposed_reward_dict[rt] = self.decomposed_reward_dict[rt] return state_1, state_2, done, dp return None, None, done, dp
async def get_result(self): try: res = await self.controller.ping() if res.status in {Status.in_game, Status.in_replay, Status.ended}: res = await self.controller._execute( observation=sc_pb.RequestObservation()) if res.HasField( "observation") and res.observation.player_result: self.result = { pr.player_id: Result(pr.result) for pr in res.observation.player_result } except Exception as e: tb = traceback.format_exc() logger.error(f"Obs-check: {e}, traceback: {tb}")
def step(self, action): end = False state = None ### ACTION TAKING ### if sum(action) > 0: for a_index, num_action in enumerate(action): for _ in range(num_action): self.use_custom_ability(action_to_ability_id[a_index]) action = actions.FUNCTIONS.no_op() self.current_obs = self.sc2_env.step([action])[0] # Get reward from data data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation()) data = data.observation.raw_data.units end, dp = self.getRewards(data) state = self.get_custom_state(data) # if not skip: # # Get channel states # # state = self.get_channel_state(self.current_obs) # # Get custom states # self.decomposed_rewards = [] # for rt in self.reward_types: # value_reward = self.decomposed_reward_dict[rt] - self.last_decomposed_reward_dict[rt] # self.decomposed_rewards.append(value_reward) # for rt in self.reward_types: # self.last_decomposed_reward_dict[rt] = self.decomposed_reward_dict[rt] # #print(self.decomposed_rewards) self.end_state = state if dp or end: # Get channel states # state = self.get_channel_state(self.current_obs) # Get custom states self.decomposed_rewards = [] for rt in self.reward_types: value_reward = self.decomposed_reward_dict[rt] - self.last_decomposed_reward_dict[rt] self.decomposed_rewards.append(value_reward) for rt in self.reward_types: self.last_decomposed_reward_dict[rt] = self.decomposed_reward_dict[rt] return state, self.get_big_A(state[self.miner_index] * 100), end, dp else: return state, None, end, dp
def reset(self): # Move the camera in any direction # This runs the ResetEpisode trigger built into the map self.decomposed_rewards_all = [] self.decomposed_rewards = [] self.decomposed_rewards_mark = 0 action = actions.FUNCTIONS.move_camera([0, 0]) self.last_timestep = self.sc2_env.step([action])[0] observation = self.unpack_timestep(self.last_timestep) self.current_obs = observation self.actions_taken = 0 #np.set_printoptions(threshold=np.nan,linewidth=np.nan) state = observation[3]['feature_screen'] player_relative = np.array(state[5]) player_relative[np.array(state[6]) == 73] = 3 player_relative[np.array(state[12]) == 1] = 3 state[5] = player_relative.tolist() state = getOneHotState(state, self.input_screen_features) state = np.reshape(state, (1, -1)) self.end_state = None #print(self.agent_interface_format.camera_width_world_units) #print(self.agent_interface_format.use_camera_position) #print(observation) #input() data = self.sc2_env._controllers[0]._client.send( observation=sc_pb.RequestObservation()) self.sc2_env._controllers[0]._client.send(action=sc_pb.RequestAction()) data = data.observation.raw_data.units rewards, sof = self.getRewards(data) self.signal_of_finished = sof for key in self.decomposed_reward_dict: self.decomposed_reward_dict[key] = 0 return state
def reset(self): # Move the camera in any direction # This runs the ResetEpisode trigger built into the map self.decomposed_rewards = [] action = actions.FUNCTIONS.move_camera([0, 0]) self.actions_taken = 0 self.current_obs = self.sc2_env.step([action])[0] self.end_state = None self.get_income_signal = 2 data = self.sc2_env._controllers[0]._client.send( observation=sc_pb.RequestObservation()) actions_space = self.sc2_env._controllers[0]._client.send( action=sc_pb.RequestAction()) data = data.observation.raw_data.units self.getRewards(data) state = self.get_custom_state(data) for rt in self.reward_types: self.decomposed_reward_dict[rt] = 0 self.last_decomposed_reward_dict[rt] = 0 return state
def step(self, action): done = False dead = False ### ACTION TAKING ### # print(action) if self.actions_taken == 0 and self.check_action(self.current_obs, 12): if action == 0: action = actions.FUNCTIONS.Attack_screen("now", [0,0]) elif action == 1: action = actions.FUNCTIONS.Attack_screen("now", [39,0]) elif action == 2: action = actions.FUNCTIONS.Attack_screen("now", [0,39]) elif action == 3: action = actions.FUNCTIONS.Attack_screen("now", [39,39]) elif action == 4: action = actions.FUNCTIONS.no_op() else: print("Invalid action: check final layer of network") action = actions.FUNCTIONS.no_op() else: action = actions.FUNCTIONS.no_op() # print(self.actions_taken == 0, self.check_action(self.current_obs, 12)) # print(action) #################### ### STATE PREPARATION ### self.last_timestep = self.sc2_env.step([action])[0] observation = self.unpack_timestep(self.last_timestep) self.current_obs = observation ######################### ### REWARD PREPARATION AND TERMINATION ### data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation()) data = data.observation.raw_data.units rewards, sof = self.getRewards(data) state = observation[3]['feature_screen'] player_relative = np.array(state[5]) player_relative[np.array(state[6]) == 73] = 3 player_relative[np.array(state[12]) == 1] = 3 state[5] = player_relative.tolist() state = getOneHotState(state, self.input_screen_features) state = np.reshape(state, (1, -1)) #print(state.shape) self.decomposed_rewards_all.append([]) la = len(self.decomposed_rewards_all) for key in self.decomposed_reward_dict: self.decomposed_rewards_all[la - 1].append(self.decomposed_reward_dict[key]) # print(self.signal_of_finished,sof) if self.signal_of_finished != sof: done = True if sof == 1: dead = True else: dead = False self.decomposed_rewards.append([]) for i in range(len(self.reward_types)): l = len(self.decomposed_rewards) la = len(self.decomposed_rewards_all) if not dead: self.decomposed_rewards[l - 1].append( self.decomposed_rewards_all[la - 1][i] - self.decomposed_rewards_all[self.decomposed_rewards_mark][i] ) else: self.decomposed_rewards[l - 1].append( self.decomposed_rewards_all[la - 2][i] - self.decomposed_rewards_all[self.decomposed_rewards_mark][i] ) self.decomposed_rewards_mark = la - 1 self.signal_of_finished = sof ''' if len(state) < 41: current_len_state = len(state) for x in range(current_len_state, 41): state.append(0.0) # print(done,dead) ''' if dead: state = observation[3]['feature_screen'] player_relative = np.array(state[5]) player_relative[np.array(state[6]) == 73] = 3 player_relative[np.array(state[12]) == 1] = 3 state[5] = player_relative.tolist() agent_units_position = np.array(state[6]) == 83 for i, s in enumerate(state): nps = np.array(s) nps[agent_units_position] = 0 state[i] = nps.tolist() state = getOneHotState(state,self.input_screen_features) state = np.reshape(state, (1, -1)) self.end_state = state return state, done, dead
unit_command = raw_pb.ActionRawUnitCommand() unit_command.ability_id = 16 # Move Ability unit_command.target_unit_tag = unit_tag_list[0] unit_command.unit_tags.append(unit_tag_list[1]) action_raw = raw_pb.ActionRaw(unit_command = unit_command) action = sc_pb.RequestAction() action.actions.add(action_raw = action_raw) test_client.comm.send(action=action) """ """Move Units""" unit_tag_list=[] observation = sc_pb.RequestObservation() t=test_client.comm.send(observation=observation) for unit in t.observation.observation.raw_data.units: if unit.unit_type == 84: # Probe unit_type_tag unit_tag_list.append(unit.tag) unit_command = raw_pb.ActionRawUnitCommand() unit_command.ability_id = 16 # Move Ability unit_command.target_world_space_pos.x = 30 unit_command.target_world_space_pos.y = 30 for i in range(0,12): unit_command.unit_tags.append(unit_tag_list[i]) action_raw = raw_pb.ActionRaw(unit_command = unit_command) action = sc_pb.RequestAction()
def get_observation(self): observation = self.sc2_env._controllers[0]._client.send( observation=sc_pb.RequestObservation()) return observation
async def observation(self): result = await self._execute(observation=sc_pb.RequestObservation()) return result
def observe(self): """Get a current observation.""" return self._client.send(observation=sc_pb.RequestObservation())
def step(self, action): done = False dead = False ### ACTION TAKING ### if self.actions_taken == 0 and self.check_action(self.current_obs, 12): if action == 0: action = actions.FUNCTIONS.Attack_screen("now", [0,0]) elif action == 1: action = actions.FUNCTIONS.Attack_screen("now", [83,0]) elif action == 2: action = actions.FUNCTIONS.Attack_screen("now", [0,83]) elif action == 3: action = actions.FUNCTIONS.Attack_screen("now", [83,83]) elif action == 4: action = actions.FUNCTIONS.no_op() else: print("Invalid action: check final layer of network") action = actions.FUNCTIONS.no_op() else: action = actions.FUNCTIONS.no_op() #################### ### STATE PREPARATION ### self.last_timestep = self.sc2_env.step([action])[0] observation, state, reward, done_null, info = self.unpack_timestep(self.last_timestep) self.current_obs = observation state = self.int_map_to_onehot(state) state = np.array(state) ######################### ### REWARD PREPARATION AND TERMINATION ### from s2clientprotocol import sc2api_pb2 as sc_pb data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation()) data = data.observation.raw_data.units damageByZealot = 0 damageToZealot = 0 damageByZergling = 0 damageToZergling = 0 damageByRoach = 0 damageToRoach = 0 damageByStalker = 0 damageToStalker = 0 damageByMarine = 0 damageToMarine = 0 damageByHydralisk = 0 damageToHydralisk = 0 wins = 0 losses = 0 rewards = [] unit_types = [] state = [] # print("#################") for x in data: # print(x.unit_type) if x.unit_type < 1922 and x.unit_type != 51: state.append(x.unit_type) state.append(x.pos.x) state.append(x.pos.y) if x.unit_type == 1922: damageByZealot = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1923: damageToZealot = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1924: damageByZergling = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1925: damageToZergling = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1926: damageByRoach = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1927: damageToRoach = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1928: damageByStalker = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1929: damageToStalker = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1930: damageByMarine = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1931: damageToMarine = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1932: damageByHydralisk = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1933: damageToHydralisk = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1934: wins = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1935: # print("LOSSSSSSSESSSSSSSS") losses = x.health rewards.append(x.health) unit_types.append(x.unit_type) # print("#################") # print(rewards) # unit_types.sort() # print(wins) # print(losses) # print(damageToHydralisk) # print(unit_types) # print(len(unit_types)) # print("#################") # print("Damage by roach: {}".format(damageByRoach)) # print("Damage by zergling: {}".format(damageByZergling)) # print("Damage to roach: {}".format(damageToRoach)) # print("Damage to zergling: {}".format(damageToZergling)) # total_reward = roach_reward + zergling_reward - 4 # reward = total_reward # self.reward = total_reward self.reward = wins + losses self.losses = losses # self.rewards.append(reward) if self.last_reward != self.reward: done = True if self.last_losses < self.losses: dead = True else: dead = False self.last_reward = self.reward self.last_losses = self.losses self.decomposed_rewards.append([damageToZealot - 2, damageToZergling - 2, damageToRoach - 2, damageToStalker - 2, damageToMarine - 2, damageToHydralisk - 2]) # damageToZealot # damageToZergling # damageToRoach # damageToStalker # damageToMarine # damageToHydralisk ########################################### # print(len(state)) if len(state) < 36: current_len_state = len(state) for x in range(current_len_state, 36): state.append(0.0) # print(len(state)) return state, reward, done, dead, info
def observe(self, disable_fog=False, target_game_loop=0): """Get a current observation.""" return self._client.send(observation=sc_pb.RequestObservation( game_loop=target_game_loop, disable_fog=disable_fog))
def reset(self): # Move the camera in any direction # This runs the ResetEpisode trigger built into the map self.decomposed_rewards = [] self.rewards = [] self.last_timestep = self.sc2_env.reset() action = actions.FUNCTIONS.move_camera([0, 0]) self.last_timestep = self.sc2_env.step([action])[0] observation, state, reward, done, info = self.unpack_timestep(self.last_timestep) self.current_obs = observation state = self.int_map_to_onehot(state) state = np.array(state) self.actions_taken = 0 from s2clientprotocol import sc2api_pb2 as sc_pb data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation()) self.sc2_env._controllers[0]._client.send(action=sc_pb.RequestAction()) data = data.observation.raw_data.units damageByZealot = 0 damageToZealot = 0 damageByZergling = 0 damageToZergling = 0 damageByRoach = 0 damageToRoach = 0 damageByStalker = 0 damageToStalker = 0 damageByMarine = 0 damageToMarine = 0 damageByHydralisk = 0 damageToHydralisk = 0 wins = 0 losses = 0 rewards = [] unit_types = [] state = [] # print("#################") for x in data: # print(x.unit_type) if x.unit_type < 1922 and x.unit_type != 51: state.append(x.unit_type) state.append(x.pos.x) state.append(x.pos.y) if x.unit_type == 1922: damageByZealot = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1923: damageToZealot = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1924: damageByZergling = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1925: damageToZergling = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1926: damageByRoach = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1927: damageToRoach = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1928: damageByStalker = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1929: damageToStalker = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1930: damageByMarine = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1931: damageToMarine = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1932: damageByHydralisk = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1933: damageToHydralisk = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1934: wins = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1935: # print("LOSSSSSSSESSSSSSSS") losses = x.health rewards.append(x.health) unit_types.append(x.unit_type) return state
def step(self, action): done = False dead = False ### ACTION TAKING ### if self.actions_taken == 0 and self.check_action(self.current_obs, 12): if action == 0: action = actions.FUNCTIONS.Attack_screen("now", [0,0]) elif action == 1: action = actions.FUNCTIONS.Attack_screen("now", [39,0]) elif action == 2: action = actions.FUNCTIONS.Attack_screen("now", [0,39]) elif action == 3: action = actions.FUNCTIONS.Attack_screen("now", [39,39]) elif action == 4: action = actions.FUNCTIONS.no_op() else: print("Invalid action: check final layer of network") action = actions.FUNCTIONS.no_op() else: action = actions.FUNCTIONS.no_op() #################### ### STATE PREPARATION ### self.last_timestep = self.sc2_env.step([action])[0] observation, state, reward, done_null, info = self.unpack_timestep(self.last_timestep) self.current_obs = observation #state = self.int_map_to_onehot(state) state = observation[3]['feature_screen'] # print(np.unique(np.array(state[6]))) #state = expand_pysc2_to_neural_input(state) input_screen_feature = {'PLAYER_RELATIVE': [1,3,16], 'UNIT_TYPE':SIMPLE_SC2_UNITS, 'HIT_POINT':0, 'HIT_POINT_RATIO': 0, 'UNIT_DENSITY':0} state = getOneHotState(state, input_screen_feature) # print('STATE SHAPE') # print(state.shape) #state shape (20, 40, 40) state = np.reshape(state, (1, -1)) state = np.array(state) # print('STATE SHAPE') # print(state.shape) #state shape (1, 32000) ######################### ### REWARD PREPARATION AND TERMINATION ### from s2clientprotocol import sc2api_pb2 as sc_pb data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation()) data = data.observation.raw_data.units damageByRoach = 0 damageByZergling = 0 damageToRoach = 0 damageToZergling = 0 for x in data: if x.unit_type == 1922: roach_reward = x.health if x.unit_type == 1923: zergling_reward = x.health if x.unit_type == 1924: damageByRoach = x.health if x.unit_type == 1925: damageByZergling = x.health if x.unit_type == 1926: damageToRoach = x.health if x.unit_type == 1927: damageToZergling = x.health # print("Damage by roach: {}".format(damageByRoach)) # print("Damage by zergling: {}".format(damageByZergling)) # print("Damage to roach: {}".format(damageToRoach)) # print("Damage to zergling: {}".format(damageToZergling)) total_reward = roach_reward + zergling_reward - 4 reward = total_reward self.reward = total_reward self.rewards.append(reward) if self.last_reward != self.reward: done = True if self.last_reward > self.reward: dead = True else: dead = False self.last_reward = self.reward self.decomposed_rewards.append([roach_reward - 2, zergling_reward - 2, damageByRoach - 2, damageByZergling - 2, damageToRoach - 2, damageToZergling - 2]) ########################################### return state, reward, done, dead, info
def step(self, action): done = False dead = False ### ACTION TAKING ### if self.actions_taken == 0 and self.check_action(self.current_obs, 12): if action == 0: action = actions.FUNCTIONS.Attack_screen("now", [0,0]) elif action == 1: action = actions.FUNCTIONS.Attack_screen("now", [83,0]) elif action == 2: action = actions.FUNCTIONS.Attack_screen("now", [0,83]) elif action == 3: action = actions.FUNCTIONS.Attack_screen("now", [83,83]) elif action == 4: action = actions.FUNCTIONS.no_op() else: print("Invalid action: check final layer of network") action = actions.FUNCTIONS.no_op() else: action = actions.FUNCTIONS.no_op() #################### ### STATE PREPARATION ### self.last_timestep = self.sc2_env.step([action])[0] observation, state, reward, done_null, info = self.unpack_timestep(self.last_timestep) self.current_obs = observation state = self.int_map_to_onehot(state) state = np.array(state) ######################### ### REWARD PREPARATION AND TERMINATION ### from s2clientprotocol import sc2api_pb2 as sc_pb data = self.sc2_env._controllers[0]._client.send(observation=sc_pb.RequestObservation()) data = data.observation.raw_data.units damageByRoach = 0 damageByZergling = 0 damageToRoach = 0 damageToZergling = 0 for x in data: if x.unit_type == 1922: roach_reward = x.health if x.unit_type == 1923: zergling_reward = x.health if x.unit_type == 1924: damageByRoach = x.health if x.unit_type == 1925: damageByZergling = x.health if x.unit_type == 1926: damageToRoach = x.health if x.unit_type == 1927: damageToZergling = x.health # print("Damage by roach: {}".format(damageByRoach)) # print("Damage by zergling: {}".format(damageByZergling)) # print("Damage to roach: {}".format(damageToRoach)) # print("Damage to zergling: {}".format(damageToZergling)) total_reward = roach_reward + zergling_reward - 4 reward = total_reward self.reward = total_reward self.rewards.append(reward) if self.last_reward != self.reward: done = True if self.last_reward > self.reward: dead = True else: dead = False self.last_reward = self.reward self.decomposed_rewards.append([roach_reward - 2, zergling_reward - 2, damageByRoach - 2, damageByZergling - 2, damageToRoach - 2, damageToZergling - 2]) ########################################### return state, reward, done, dead, info
def reset(self): # Move the camera in any direction # This runs the ResetEpisode trigger built into the map self.decomposed_rewards = [] self.rewards = [] self.last_timestep = self.sc2_env.reset() action = actions.FUNCTIONS.move_camera([0, 0]) self.last_timestep = self.sc2_env.step([action])[0] observation, state, reward, done, info = self.unpack_timestep( self.last_timestep) self.current_obs = observation #state = self.int_map_to_onehot(state) state = np.array(state) self.actions_taken = 0 from s2clientprotocol import sc2api_pb2 as sc_pb data = self.sc2_env._controllers[0]._client.send( observation=sc_pb.RequestObservation()) self.sc2_env._controllers[0]._client.send(action=sc_pb.RequestAction()) data = data.observation.raw_data.units damageByZealot = 0 damageToZealot = 0 damageByZergling = 0 damageToZergling = 0 damageByRoach = 0 damageToRoach = 0 damageByStalker = 0 damageToStalker = 0 damageByMarine = 0 damageToMarine = 0 damageByHydralisk = 0 damageToHydralisk = 0 wins = 0 losses = 0 rewards = [] unit_types = [] state = [] # print("#################") for x in data: # print(x.unit_type) if x.unit_type < 1922 and x.unit_type != 51: state.append(x.unit_type) state.append(x.pos.x) state.append(x.pos.y) if x.unit_type == 1922: damageByZealot = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1923: damageToZealot = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1924: damageByZergling = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1925: damageToZergling = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1926: damageByRoach = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1927: damageToRoach = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1928: damageByStalker = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1929: damageToStalker = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1930: damageByMarine = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1931: damageToMarine = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1932: damageByHydralisk = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1933: damageToHydralisk = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1934: wins = x.health rewards.append(x.health) unit_types.append(x.unit_type) if x.unit_type == 1935: # print("LOSSSSSSSESSSSSSSS") losses = x.health rewards.append(x.health) unit_types.append(x.unit_type) state = observation[3]['feature_screen'] #state = expand_pysc2_to_neural_input(state) input_screen_feature = { 'PLAYER_RELATIVE': [1, 3, 16], 'UNIT_TYPE': SIMPLE_SC2_UNITS, 'HIT_POINT': 0, 'HIT_POINT_RATIO': 0, 'UNIT_DENSITY': 0 } state = getOneHotState(state, input_screen_feature) # print('STATE SHAPE:') # print(state.shape) state = np.reshape(state, (1, -1)) return state