def reset(self): # Train our agent against the baseline # See https://github.com/Kaggle/kaggle-environments#Training self.trainer = self.env.train(self.agents) raw_obs = self.trainer.reset() # Raw observations (See https://github.com/google-research/football/blob/master/gfootball/doc/observation.md) raw_obs = raw_obs['players_raw'][0] if self.obs_representation == "smm": obs = observation_preprocessing.generate_smm([raw_obs])[0] elif self.obs_representation == "stacked_smm": obs = observation_preprocessing.generate_smm([raw_obs])[0] if not self.stacked_obs: self.stacked_obs.extend([obs] * 4) else: self.stacked_obs.append(obs) obs = np.concatenate(list(self.stacked_obs), axis=-1) elif self.obs_representation == "float115": obs = Simple115StateWrapper.convert_observation([raw_obs], True)[0] elif self.obs_representation == "pixels": pass elif self.obs_representation == "raw": obs, _ = OBSParser.parse(obs) return obs
def step(self, action): # Step through the environment raw_obs, reward, done, info = self.env.step([action]) # Obtain raw observation raw_obs = raw_obs[0] # Extract metainfo from obs # Reward Shaping (If applicable) if "ball_possession" in self.rewards: # Reward winning ball possession and penalize lossing ball possession prev_ball_owned_team = self.ball_owned_team cur_ball_owned_team = self.raw_obs['ball_owned_team'] # Win ball possession if prev_ball_owned_team == 1 and cur_ball_owned_team == 0: reward += 0.1 # Lose ball possession if prev_ball_owned_team == 0 and cur_ball_owned_team == 1: reward -= 0.1 self.ball_owned_team = cur_ball_owned_team # Scale Rewards #reward = reward * 10 if self.obs_representation == "smm": obs = observation_preprocessing.generate_smm([raw_obs])[0] elif self.obs_representation == "stacked_smm": obs = observation_preprocessing.generate_smm([raw_obs])[0] if not self.stacked_obs: self.stacked_obs.extend([obs] * 4) else: self.stacked_obs.append(obs) obs = np.concatenate(list(self.stacked_obs), axis=-1) elif self.obs_representation == "float115": obs = Simple115StateWrapper.convert_observation([raw_obs], True)[0] elif self.obs_representation == "pixels": pass elif self.obs_representation == "raw": obs, (l_score, r_score, custom_reward) = OBSParser.parse(obs) # Extract MetaInfo like scoring from raw_obs __, (l_score, r_score, __) = OBSParser.parse(raw_obs) info['l_score'] = l_score info['r_score'] = r_score # Use goal difference as custom reward for now return obs, reward, done, info
def _side_to_df(self, get_s115: bool = True, get_smm: bool = True, get_raw: bool = True) -> Tuple[Union[None, np.ndarray], Union[None, np.ndarray], Union[None, np.ndarray], np.ndarray]: """Select the correct side/agent from the downloaded data and return as df.""" raw_obs = [] s115_obs = [] smm_obs = [] actions = [] for step in np.arange(1, self._expected_steps + 1): players_raw = self.data['steps'][step][self.side.value]['observation']['players_raw'] if get_s115: s115_obs.append(Simple115StateWrapper.convert_observation(players_raw, fixed_positions=True)) if get_smm: smm_obs.append(observation_preprocessing.generate_smm([players_raw[0]])) if get_raw: raw_obs.append(RawObs.convert_observation(players_raw)) actions.append(self.data['steps'][step][self.side.value]['action'][0]) s115_obs = np.concatenate(s115_obs, axis=0) if get_s115 else None smm_obs = np.concatenate(smm_obs, axis=0) if get_smm else None raw_obs = np.concatenate(raw_obs, axis=0).astype(np.float32) if get_raw else None return s115_obs, smm_obs, raw_obs, np.expand_dims(np.array(actions, dtype=np.uint8), axis=1)
def agent(obs): global step_nr global previous_action global observations global state global policy # Get observations for the first (and only one) player we control. obs = obs['players_raw'][0] # Agent we trained uses Super Mini Map (SMM) representation. # See https://github.com/google-research/seed_rl/blob/master/football/env.py for details. obs = observation_preprocessing.generate_smm([obs])[0] print(obs.shape) if not observations: observations.extend([obs] * 4) else: observations.append(obs) # SEED packs observations to reduce transfer times. # See PackedBitsObservation in https://github.com/google-research/seed_rl/blob/master/football/observation.py obs = np.concatenate(list(observations), axis=-1) obs = np.packbits(obs, axis=-1) if obs.shape[-1] % 2 == 1: obs = np.pad(obs, [(0, 0)] * (obs.ndim - 1) + [(0, 1)], 'constant') obs = obs.view(np.uint16) # Execute our agent to obtain action to take. enc = lambda x: x dec = lambda x, s=None: x if s is None else tf.nest.pack_sequence_as(s, x) agent_output, state = policy.get_action( *dec(enc(prepare_agent_input(obs, previous_action, state)))) previous_action = agent_output.action[0] return [int(previous_action)]
def process_obs( obs: Union[Dict[str, Any], List[Any]]) -> Tuple[np.ndarray, np.ndarray]: """ Obs can be from gym env or the version passed from Kaggle runner. We need to extract this dict to generate simple and SMM obs: dict_keys(['left_team_tired_factor', 'left_team_yellow_card', 'right_team_tired_factor', 'left_team', 'ball_owned_player', 'right_team_yellow_card', 'ball_rotation', 'ball_owned_team', 'ball', 'right_team_roles', 'right_team_active', 'steps_left', 'score', 'right_team', 'left_team_roles', 'ball_direction', 'left_team_active', 'left_team_direction', 'right_team_direction', 'game_mode', 'designated', 'active', 'sticky_actions']) Which is located in: - Kag obs: obs_kag_env['players_raw'][0].keys(): - Gym obs: obs_gym_env[0].keys() """ if isinstance(obs, dict): obs = obs['players_raw'] # This can return multiple rows when env has: # number_of_left_players_agent_controls=1 and number_of_right_players_agent_controls=1 simple_obs = Simple115StateWrapper.convert_observation( obs, fixed_positions=False).reshape(-1) smm_obs = observation_preprocessing.generate_smm([obs[0]]) return smm_obs, simple_obs
def take_action(self, observation): assert len(observation) == 1, 'Multiple players control is not supported' observation = observation_preprocessing.generate_smm(observation) observation = self._stacker.get(observation) action = self._policy.step(observation)[0][0] actions = [action] #[football_action_set.action_set_dict[self._action_set][action]] return actions
def add_raw_observation(self, observation): if 'extracted' in self._observation_kind: observation = observation_preprocessing.generate_smm(observation) else: assert False, 'Unsupported observation kind!' if self._data: self._data = self._data + [observation] self._data = self._data[-self._stacked_size:] else: self._data = [observation] * self._stacked_size
def __getitem__(self, idx): """ Return Stacked Spatial Minimap (SMM) and Float115_v2 Representation Reference: https://github.com/google-research/football/blob/master/gfootball/doc/observation.md#Observation%20Wrappers """ # For Frame Stack stacked_obs = collections.deque([], maxlen=self.stack_frames) frame_name = self.df.loc[idx, 'frame_name'] frame_step = int(frame_name.split('_')[1]) if frame_step >= 5 and idx >= 5: for frame_idx in list(range(idx + 1))[-self.stack_frames:]: frame_name = self.df.loc[frame_idx, 'frame_name'] with open(join(obs_frames_path, frame_name), 'rb') as pkl_file: raw_obs = pickle.load(pkl_file) smm_obs = observation_preprocessing.generate_smm([raw_obs ])[0] smm_obs = smm_obs / 255.0 stacked_obs.append(smm_obs) else: with open(join(obs_frames_path, frame_name), 'rb') as pkl_file: raw_obs = pickle.load(pkl_file) smm_obs = observation_preprocessing.generate_smm([raw_obs])[0] smm_obs = smm_obs / 255.0 stacked_obs.extend([smm_obs] * self.stack_frames) smm_frame = np.concatenate(list(stacked_obs), axis=-1) # Float115 Obs float115_frame = Simple115StateWrapper.convert_observation([raw_obs], True)[0] # Retrieve action action = self.df.loc[idx, 'action'] if self.train: return (smm_frame, float115_frame), int(action) else: return (smm_frame, float115_frame)
def agent(obs): # Obs for first player (i.e. player we control) obs = obs['players_raw'][0] obs = observation_preprocessing.generate_smm([obs])[0] if not stacked_obs: stacked_obs.extend([obs] * 4) else: stacked_obs.append(obs) obs = np.concatenate(list(stacked_obs), axis=-1) action = policy.predict(obs) return [int(action)]
def run_agent(self, obs, config, reward, info): print('About to start the agent') # Simple115 observation simple115_obs = wrappers.Simple115StateWrapper.convert_observation(obs.players_raw, True) # Or minimap observation. minimap = observation_preprocessing.generate_smm(obs.players_raw) ## TODO: this should not be a batch dimension. print("Calling the model") action = np.argmax(self._model(minimap)) print("Done") # you have to cast it back to int (from numpy.int64) return [int(action)]
def take_action(self, observations): assert len(observations) == 1, 'Multiple players control is not supported' # print() # print('take_action') # print(observation) # assert 0, observation observations = observation_preprocessing.generate_smm(observations) # print(observation) observations = self._stacker.get(observations) # print(observation) action = self._policy.step(observations)[0][0] print(action) assert self._action_set == 'default', self._action_set actions = [football_action_set.ACTION_SET_DICT[self._action_set][action]] # print(actions) return actions
def run_agent(self, obs, config, reward, info): print('About to start the agent') # As we train a simple, single player agent we want to control a single player at a time, so we use # MultiAgentToSingleAgent wrapper to modify multi-agent scenario observations. single_obs = wrappers.MultiAgentToSingleAgent.get_observation( obs.players_raw) # Then we can apply additional wrappers to use different observation format for the agent. # For more details see https://github.com/google-research/football/blob/master/gfootball/doc/observation.md # Simple115 observation: simple115_obs = wrappers.Simple115StateWrapper.convert_observation( obs.players_raw, True) # Minimap observation: minimap = observation_preprocessing.generate_smm(obs.players_raw) ## TODO: this should not be a batch dimension. print("Calling the model") action = np.argmax(self._model(minimap)) print("Done") # you have to cast it back to int (from numpy.int64) return wrappers.MultiAgentToSingleAgent.get_action( action, obs.players_raw)
def obs_convert(obs): return observation_preprocessing.generate_smm([obs['players_raw'][0]], channel_dimensions=(84, 84))[0]
def observation(self, obs): return observation_preprocessing.generate_smm(obs)
def obs_convert(obs): return observation_preprocessing.generate_smm([obs['players_raw'][0]])[0]
def take_action(self, observation): observation = observation_preprocessing.generate_smm(observation) observation = self._stacker.get(observation) action = self._sess.run("player_0/ppo2_model/ArgMax:0", feed_dict={"player_0/ppo2_model/Ob:0": observation}) return [int(action[0])]
def main(_): left_player = 'ppo2_cnn:left_players=1,policy=gfootball_impala_cnn,checkpoint=/Users/stephen/Documents/football/checkpoints/11_vs_11_easy_stochastic_v2' right_player = 'ppo2_cnn:right_players=1,policy=gfootball_impala_cnn,checkpoint=/Users/stephen/Documents/football/checkpoints/11_vs_11_easy_stochastic_v2' players = [left_player, right_player] env_config_values = { 'dump_full_episodes': False, 'dump_scores': False, 'players': players, 'level': '11_vs_11_easy_stochastic', 'tracesdir': '/Users/stephen/Documents/football/logs', # logdir 'write_video': False } env_config = config.Config(env_config_values) env = football_env.FootballEnv(env_config) env.reset() player_config = {'index': 2} name, definition = config.parse_player_definition(left_player) config_name = 'player_{}'.format(name) if config_name in player_config: player_config[config_name] += 1 else: player_config[config_name] = 0 player_config.update(definition) player_config['stacked'] = True player = Player(player_config, env_config) stacker = ObservationStacker(4) n_timesteps = 30000 # 10 games game_i = 0 observations = [] actions = [] for i in range(n_timesteps): obs, _, done, _ = env.step([]) obs_processed = observation_preprocessing.generate_smm([obs]) obs_processed = stacker.get(obs_processed) observations.append(obs_processed) act = player.take_action([obs])[0] actions.append(full_action_set.index(act)) if done: env.reset() stacker.reset() observations = np.squeeze(np.vstack( observations)) # should not be shape (3000, 72, 96, 16) actions = np.array(actions) # should be shape (n_samples,) with open( f'/Users/stephen/Documents/football/data/observations{game_i}.pkl', 'wb') as f: pickle.dump(observations, f) with open( f'/Users/stephen/Documents/football/data/actions{game_i}.pkl', 'wb') as f: pickle.dump(actions, f) game_i += 1 observations = [] actions = [] print('Done :)')
def observation(self, obs): return observation_preprocessing.generate_smm( obs, channel_dimensions=self._channel_dimensions, config=self.env.unwrapped._config)
from kaggle_environments import make env = make("football", configuration={ "save_video": True, "scenario_name": "11_vs_11_kaggle" }) # This is the observation that is passed on agent function. obs_kag_env = env.state[0]['observation'] print(obs_kag_env.keys()) simple_obs_ = Simple115StateWrapper.convert_observation( obs_kag_env['players_raw'], fixed_positions=False) smm_obs_ = observation_preprocessing.generate_smm( [obs_kag_env['players_raw'][0]]) base_env = gym.make("GFootball-11_vs_11_kaggle-SMM-v0").unwrapped obs_gym_env = base_env.reset() wrapped_env = SimpleAndSMMObsWrapper(base_env.unwrapped) wrapped_env.reset() SimpleAndSMMObsWrapper.process_obs(obs_kag_env) SimpleAndSMMObsWrapper.process_obs(obs_gym_env) buff_wrapped_env = SMMFrameProcessWrapper(wrapped_env) buff_obs = buff_wrapped_env.reset() buff_obs = buff_wrapped_env.step(1) buffed_smm = SMMFrameProcessWrapper(