def __init__(self, args): super(GFootballEnv, self).__init__() self.representation = args.state.split('_')[0] self.stacked = 'stacked' in args.state # wrap the original environment # https://github.com/google-research/football/blob/master/gfootball/env/__init__.py # this is same as online environment self.raw_env = football_env.create_environment( env_name=args.level, stacked=False, # set to False to align with online evaluation representation="raw", # set to raw to align with online evaluation rewards=args.reward_experiment) self.obs_stack = deque([], maxlen=4) # this is for training environment self.real_env = football_env.create_environment( env_name=args.level, stacked=self. stacked, # set to False to align with online evaluation representation=self.representation, rewards=args.reward_experiment) self.action_space = self.real_env.action_space self.observation_space = self.real_env.observation_space
def random(): # render the environment display_train = False if display_train: env = football_env.create_environment(env_name='academy_empty_goal', representation='pixels', render=True) else: env = football_env.create_environment(env_name='academy_empty_goal', representation='simple115', render=True) total_rewards = 0.0 total_steps = 0 obs = env.reset() # start an episode. while True: action = env.action_space.sample() # choose a random action obs, reward, done, _ = env.step(action) # obtain rewards total_rewards += reward total_steps += 1 if done: break print("episode done %d steps. total rewards %.2f" % (total_steps, total_rewards))
def __init__(self, args): self.num_agents = args.num_agents self.scenario_name = args.scenario_name # make env if not (args.use_render and args.save_videos): self.env = football_env.create_environment( env_name=args.scenario_name, stacked=args.use_stacked_frames, representation=args.representation, rewards=args.rewards, number_of_left_players_agent_controls=args.num_agents, number_of_right_players_agent_controls=0, channel_dimensions=(args.smm_width, args.smm_height), render=(args.use_render and args.save_gifs)) else: # render env and save videos self.env = football_env.create_environment( env_name=args.scenario_name, stacked=args.use_stacked_frames, representation=args.representation, rewards=args.rewards, number_of_left_players_agent_controls=args.num_agents, number_of_right_players_agent_controls=0, channel_dimensions=(args.smm_width, args.smm_height), # video related params write_full_episode_dumps=True, render=True, write_video=True, dump_frequency=1, logdir=args.video_dir) self.max_steps = self.env.unwrapped.observation()[0]["steps_left"] self.remove_redundancy = args.remove_redundancy self.zero_feature = args.zero_feature self.share_reward = args.share_reward self.action_space = [] self.observation_space = [] self.share_observation_space = [] if self.num_agents == 1: self.action_space.append(self.env.action_space) self.observation_space.append(self.env.observation_space) self.share_observation_space.append(self.env.observation_space) else: for idx in range(self.num_agents): self.action_space.append( spaces.Discrete(n=self.env.action_space[idx].n)) self.observation_space.append( spaces.Box(low=self.env.observation_space.low[idx], high=self.env.observation_space.high[idx], shape=self.env.observation_space.shape[1:], dtype=self.env.observation_space.dtype)) self.share_observation_space.append( spaces.Box(low=self.env.observation_space.low[idx], high=self.env.observation_space.high[idx], shape=self.env.observation_space.shape[1:], dtype=self.env.observation_space.dtype))
def main(): display_train = False # render environment if display_train: env = football_env.create_environment(env_name='academy_empty_goal', representation='pixels', render=True) else: env = football_env.create_environment(env_name='academy_empty_goal', representation='simple115', render=True) print("action space=", env.action_space) print("observation space", env.observation_space)
def __init__(self, env_name='academy_empty_goal_close', representation='simple115', rewards='scoring,checkpoints', stacked=False, dump_path=None, solved_at=1, **kwargs): if football_env is None: raise ImportError('Could not import gfootball! ' 'HINT: Perform the setup instructions here: ' 'https://github.com/google-research/football') self._solved_at = solved_at self._env = football_env.create_environment( env_name=env_name, representation=representation, rewards=rewards, stacked=stacked, write_full_episode_dumps=dump_path is not None, write_goal_dumps=False, logdir=dump_path or '', **kwargs) self.action_space = self._env.action_space self.observation_space = self._env.observation_space
def watch(self, *, env, matches, weights, record): environment = SubprocVecEnv([ lambda: football.create_environment( env_name = "11_vs_11_easy_stochastic", representation = self.configs[0]["representation"], rewards = self.configs[0]["rewards"], enable_goal_videos = False, enable_full_episode_videos = True, render = True, write_video = record, dump_frequency = 1, logdir = "/home/charlie/Projects/Python/Football/videos/", extra_players = self.configs[0]["extra_players"], number_of_left_players_agent_controls = self.configs[0]["number_of_left_players_agent_controls"], number_of_right_players_agent_controls = self.configs[0]["number_of_right_players_agent_controls"], enable_sides_swap = self.configs[0]["enable_sides_swap"] ) for _ in range(1) ]) # self.model.set_env(environment) watch = PPO2.load(weights, env = environment) for match in range(matches): watch.learn(total_timesteps = 3100)
def create_gfootball_multiagent_env(save_dir, dump_freq, render): num_agents = 3 # for 'academy_3_vs_1_with_keeper' scenario env = football_env.create_environment( env_name='academy_3_vs_1_with_keeper', stacked=False, representation='simple115', rewards='scoring', logdir=str(save_dir/'football_dumps'), enable_goal_videos=False, enable_full_episode_videos=bool(dump_freq), render=render, write_video=True, dump_frequency=dump_freq, number_of_left_players_agent_controls=3, number_of_right_players_agent_controls=0, enable_sides_swap=False, channel_dimensions=( observation_preprocessing.SMM_WIDTH, observation_preprocessing.SMM_HEIGHT) ) action_spaces = [gym.spaces.Discrete(env.action_space.nvec[1]) for _ in range(num_agents)] observation_spaces = [gym.spaces.Box( low=env.observation_space.low[0], high=env.observation_space.high[0], dtype=env.observation_space.dtype) for _ in range(num_agents)] return env, action_spaces, observation_spaces
def get_env_info(env_name, state, reward_experiment, num_left_agents, num_right_agents, representation, render, seed, num_agents): is_football = '11' in env_name or 'academy' in env_name if is_football: env = football_env.create_environment( representation=representation, env_name=env_name, stacked=('stacked' in state), rewards=reward_experiment, logdir=None, render=render and (seed == 0), dump_frequency=50 if render and seed == 0 else 0) else: env = create_atari_mjc_env(env_name) if num_agents == 1: from a2c_ppo_acktr.envs import ObsUnsqueezeWrapper env = ObsUnsqueezeWrapper(env) env.reset() num_left_player = env.unwrapped._cached_observation[0]['left_team'].shape[ 0] if is_football else 1 num_right_player = env.unwrapped._cached_observation[0][ 'right_team'].shape[0] if is_football else 0 observation_space = copy.deepcopy(env.observation_space) action_space = copy.deepcopy(env.action_space) env.close() return num_left_player, num_right_player, observation_space, action_space
def _create_single_football_env( level, stacked, representation, reward_experiment, write_goal_dumps, write_full_episode_dumps, write_video, dump_frequency, render, process_number=0, ): """ Creates gfootball environment. Meaning of all variables you can find in footbal.gfootball.examples.run_ppo2.py """ env = create_environment( env_name=level, stacked=stacked, representation=representation, rewards=reward_experiment, logdir=logger.get_dir(), write_goal_dumps=write_goal_dumps and (process_number == 0), write_full_episode_dumps=write_full_episode_dumps and (process_number == 0), write_video=write_video, render=render and (process_number == 0), dump_frequency=dump_frequency if render and process_number == 0 else 0) env = monitor.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(process_number))) return env
def main(args): print(args.early_stop) print(args.disable_actions) set_seed(args.seed) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # initialize environment n_players = 3 env = football_env.create_environment(env_name="academy_3_vs_1_with_keeper", representation="simple115", number_of_left_players_agent_controls=n_players, stacked=False, logdir="/tmp/football", write_goal_dumps=False, write_full_episode_dumps=False, render=False) # state and action space state_space_size = env.observation_space.shape[1] # we are using simple115 representation action_space_size = env.action_space.nvec.tolist()[0] # 三个 players 动作空间相同 action_list = list(range(action_space_size)) # state[98:100] 表示控制的三个球员 if args.disable_actions: action_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] action_space_size = len(action_list) # model print("loading models") actors = [Actor(state_space_size=state_space_size, action_space_size=action_space_size) for _ in range(n_players)] critic = Critic(state_space_size=state_space_size) # maa2c maa2c = MAA2C(args=args, env=env, actors=actors, critic=critic, action_list=action_list, device=device) print("learn") maa2c.learn()
def main(args): set_seed(args.seed) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # initialize environment n_players = 3 env = football_env.create_environment( env_name="academy_3_vs_1_with_keeper", representation="simple115", number_of_left_players_agent_controls=n_players, stacked=False, logdir="/tmp/football", write_goal_dumps=False, write_full_episode_dumps=False, render=False) # state and action space state_space_size = env.observation_space.shape[ 1] # we are using simple115 representation action_space_size = env.action_space.nvec.tolist()[0] # 三个 players 动作空间相同 # state[98:100] 表示控制的三个球员 # model print("loading models") actors = [ Actor(state_space_size=state_space_size, action_space_size=action_space_size) for _ in range(n_players) ] critics = [ Critic(state_space_size=state_space_size, action_space_size=action_space_size, n_players=n_players) for _ in range(n_players) ] old_actors = [ Actor(state_space_size=state_space_size, action_space_size=action_space_size) for _ in range(n_players) ] old_critics = [ Critic(state_space_size=state_space_size, action_space_size=action_space_size, n_players=n_players) for _ in range(n_players) ] for old_actor, actor in zip(old_actors, actors): old_actor.load_state_dict(actor.state_dict()) for old_critic, critic in zip(old_critics, critics): old_critic.load_state_dict(critic.state_dict()) # maddpg maddpg = MADDPG(env=env, action_list=list(range(action_space_size)), actors=actors, critics=critics, old_actors=old_actors, old_critics=old_critics, args=args, device=device) print("learn") maddpg.learn()
def __init__(self, env_name="11_vs_11_easy_stochastic", obs_representation="stacked_smm", rewards="scoring,checkpoints", logdir="/tmp/football", env_id=0): super(FootballEnvWrapper, self).__init__() print("Env: " + env_name) self.env = football_env.create_environment( env_name=env_name, stacked=False, representation='raw', rewards=rewards, logdir=logdir, write_goal_dumps=False and (env_id == 0), write_full_episode_dumps=False and (env_id == 0), write_video=False and (env_id == 0), render=False, dump_frequency=30) self.obs_representation = obs_representation # For Frame Stack self.stacked_obs = collections.deque([], maxlen=4) # Define observation space and action space # They must be gym.spaces objects self.action_space = gym.spaces.Discrete(19) # 19 actions if obs_representation == "smm": self.observation_space = gym.spaces.Box(low=0, high=255, shape=(72, 96, 4), dtype=np.uint8) elif obs_representation == "stacked_smm": self.observation_space = gym.spaces.Box(low=0, high=255, shape=(72, 96, 16), dtype=np.uint8) elif obs_representation == "float115": self.observation_space = gym.spaces.Box(low=-30.0, high=30.0, shape=(115, ), dtype=np.float32) elif obs_representation == "pixels": pass elif obs_representation == "raw": # Use OBSParser self.observation_space = gym.spaces.Box(low=-30.0, high=30.0, shape=(207, ), dtype=np.float32) self.ball_owned_team = -1 self.rewards = rewards
def __init__(self, seed_dir, dump_freq, representation, render): self.nagents = 3 # only using 'academy_3_vs_1_with_keeper' level for the moment self.representation = representation if representation == "simple37": env_representation = "simple115" # we convert obs from 115 to 37 observations ourselves else: env_representation = representation # Instantiate environment self.env = football_env.create_environment( env_name='academy_3_vs_1_with_keeper', stacked=False, representation=env_representation, rewards='scoring', logdir=str(seed_dir / 'football_dumps'), enable_goal_videos=False, enable_full_episode_videos=bool(dump_freq), render=render, write_video=True, dump_frequency=dump_freq, number_of_left_players_agent_controls=3, number_of_right_players_agent_controls=0, enable_sides_swap=False, channel_dimensions=(observation_preprocessing.SMM_WIDTH, observation_preprocessing.SMM_HEIGHT)) obs_space_low = self.env.observation_space.low[0] obs_space_high = self.env.observation_space.high[0] # Adapting obs_space properties if we transform simple115 to simple37 if self.representation == "simple37": obs_space_low = self.convert_simple115_to_simple37( simple115_vectors=np.expand_dims(obs_space_low, axis=0))[0] obs_space_high = self.convert_simple115_to_simple37( simple115_vectors=np.expand_dims(obs_space_high, axis=0))[0] # Define some useful attributes self.action_space = [ gym.spaces.Discrete(self.env.action_space.nvec[1]) for _ in range(self.nagents) ] self.observation_space = [ gym.spaces.Box(low=obs_space_low, high=obs_space_high, dtype=self.env.observation_space.dtype) for _ in range(self.nagents) ] self.agent_types = ['agent' for _ in range(self.nagents)] cm = plt.cm.get_cmap('tab20') self.agent_colors = [ np.array(cm(float(i) / float(self.nagents))[:3]) for i in range(self.nagents) ]
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) elif 'academy' in env_id or '11' in env_id: # gfootball environments #env = football_env.create_environment( # env_name=env_id, stacked=('stacked' in state), # with_checkpoints=('with_checkpoints' in reward_experiment), # logdir=log_dir, # enable_goal_videos=dump_scores and (seed == 0), # enable_full_episode_videos=dump_full_episodes and (seed == 0), # render=render and (seed == 0), # dump_frequency=50 if render and seed == 0 else 0, # representation='extracted') env = football_env.create_environment( env_name=env_id, stacked=('stacked' in state), rewards=reward_experiment, logdir=log_dir, render=render and (seed == 0), dump_frequency=50 if render and seed == 0 else 0) env = EpisodeRewardScoreWrapper( env, number_of_left_players_agent_controls=1, number_of_right_players_agent_controls=0) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env, frame_stack=True) elif len(env.observation_space.shape ) == 3 and 'academy' not in env_id and '11' not in env_id: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def __init__(self, version, envs, hours = 0, verbose = False, weights = None): self.version = version self.name = "football-ppo{}".format(version) + "-e{}" self.path = "models/football-ppo-{}/".format(version) self.defaults = { "env_name": "", "representation": "simple115", "rewards": "scoring", "render": False, "write_video": False, "dump_frequency": 1, "extra_players": None, "number_of_left_players_agent_controls": 1, "number_of_right_players_agent_controls": 0, "enable_sides_swap": False, "parallel": 1 } self.configs = list(map(lambda b: dict(map(lambda a: (a[0], a[1] if a[0] not in b.keys() else b[a[0]]), self.defaults.items())), envs)) self.training = SubprocVecEnv(reduce(lambda a, b: a + b, list(map(lambda config: [ lambda: football.create_environment( env_name = config["env_name"], representation = config["representation"], rewards = config["rewards"], render = config["render"], write_video = config["write_video"], dump_frequency = config["dump_frequency"], extra_players = config["extra_players"], number_of_left_players_agent_controls = config["number_of_left_players_agent_controls"], number_of_right_players_agent_controls = config["number_of_right_players_agent_controls"], enable_sides_swap = config["enable_sides_swap"] ) for _ in range(config["parallel"]) ], self.configs)), [])) self.inputs = self.training.get_attr("observation_space")[0].shape[0] self.outputs = self.training.get_attr("action_space")[0].n self.verbose = verbose if not verbose: os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" deprecation._PRINT_DEPRECATION_WARNINGS = False logger = logging.getLogger() logger.setLevel(logging.ERROR) if weights == None: self.model = PPO2(policy = MlpPolicy, env = self.training, verbose = int(self.verbose)) else: self.model = PPO2.load(weights, env = self.training, learning_rate = 0.002) self.experience = hours * 60
def create_single_football_env(args): env = football_env.create_environment( env_name=args.env_name, stacked=True, logdir='/tmp/rllib_test', write_goal_dumps=False, write_full_episode_dumps=False, render=args.render, dump_frequency=0, number_of_left_players_agent_controls=args.left_agent, number_of_right_players_agent_controls=args.right_agent, channel_dimensions=(42, 42)) return env
def init_env(): env = football_env.create_environment( env_name=config["academy_scenario"], rewards=config["scoring"], render=config["render_mode"], number_of_left_players_agent_controls=config["num_to_control"], representation='simple115v2') env.seed(seed + rank * 1000) np.random.seed(seed + rank * 1000) return env
def create_single_football_env(iprocess): """Creates gfootball environment.""" env = football_env.create_environment( env_name=FLAGS.level, stacked=('stacked' in FLAGS.state), rewards=FLAGS.reward_experiment, logdir=logger.get_dir(), write_goal_dumps=FLAGS.dump_scores and (iprocess == 0), write_full_episode_dumps=FLAGS.dump_full_episodes and (iprocess == 0), render=FLAGS.render and (iprocess == 0), dump_frequency=50 if FLAGS.render and iprocess == 0 else 0) env = monitor.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir())) return env
def __init__(self, num_agents): self.env = football_env.create_environment( env_name='11_vs_11_easy_stochastic', stacked=True, logdir='logs', write_goal_dumps=False, write_full_episode_dumps=False, render=False, dump_frequency=0, number_of_left_players_agent_controls=1, number_of_right_players_agent_controls=0,rewards='scoring,checkpoints') self.action_space = self.env.action_space self.observation_space = self.env.observation_space self.num_agents = 1
def get_next(self): scenario = self.training_plan.popleft() self.current_scenario_name = scenario env = football_env.create_environment( env_name=scenario, stacked=False, logdir='/tmp/football', representation=self.representation, write_goal_dumps=False, write_full_episode_dumps=False, render=False) return env
def __init__(self, config): self.mode = config["mode"] if self.mode == 'test': print( "\n++++++++++++++++++++RUNNING EVALUATION MODE++++++++++++++++++++++\n" ) # Testing mode self.env = football_env.create_environment( env_name='11_vs_11_easy_stochastic', stacked=True, logdir='/tmp/rllib_test', write_goal_dumps=False, write_full_episode_dumps=False, render=False, dump_frequency=0, number_of_left_players_agent_controls=1, number_of_right_players_agent_controls=0, rewards='scoring,checkpoints') self.num_agents = 1 else: # Training mode self.env = football_env.create_environment( env_name='11_vs_11_easy_stochastic', stacked=True, logdir='/tmp/rllib_test', write_goal_dumps=False, write_full_episode_dumps=False, render=False, dump_frequency=0, number_of_left_players_agent_controls=1, number_of_right_players_agent_controls=1, rewards='scoring,checkpoints') self.num_agents = 2 self.observation_space = gym.spaces.Box(0, 255, [72, 96, 16], dtype=np.float32) self.action_space = gym.spaces.Discrete(19)
def init_env(): # (** EDITED **) Google Football Env # Gym Env 설정 함수를 변경 env = football_env.create_environment( env_name=config["academy_scenario"], rewards=config["scoring"], render=config["render_mode"], number_of_left_players_agent_controls=config["num_to_control"], representation='raw') env.seed(seed + rank * 1000) np.random.seed(seed + rank * 1000) return env
def create_single_football_env(seed): """Creates gfootball environment.""" env = football_env.create_environment( env_name=args.level, stacked=('stacked' in args.state), rewards=args.reward_experiment, # logdir=logger.get_dir(), # enable_goal_videos=args.dump_scores and (seed == 0), # enable_full_episode_videos=args.dump_full_episodes and (seed == 0), render=args.render and (seed == 0), dump_frequency=50 if args.render and seed == 0 else 0) # env = monitor.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), # str(seed))) return env
def run(config): env = football_env.create_environment( env_name=config["academy_scenario"], rewards=config["scoring"], render=config["render_mode"], number_of_left_players_agent_controls=config["num_to_control"], representation='raw') model = AttentionSAC.init_from_save( "./models/football/MAAC3/run2/model.pt", True) # (** EDITED **) Set Replay Buffer # env.action_space, env.observation_space 의 shape를 iteration을 통해 버퍼 설정 for ep_i in range(0, config["n_episodes"], config["n_rollout_threads"]): obs = env.reset() obs = make_state(obs) model.prep_rollouts(device='cpu') for et_i in range(config["episode_length"]): print("episode : {} | step : {}".format(ep_i, et_i), end='\r') # rearrange observations to be per agent, and convert to torch Variable torch_obs = [ Variable(torch.Tensor(np.vstack(obs[:, i])), requires_grad=False) for i in range(model.nagents) ] # get actions as torch Variables torch_agent_actions = model.step(torch_obs, explore=True) # convert actions to numpy arrays agent_actions = [ac.data.numpy() for ac in torch_agent_actions] # rearrange actions to be per environment actions = [[ac[i] for ac in agent_actions] for i in range(config["n_rollout_threads"])] # Reform Actions list to fit on Football Env # Google Football 환경은 액션 리스트 (one hot encoded)가 아닌 정수값을 받음 actions_list = [[np.argmax(b) for b in a] for a in actions] # Step next_obs, rewards, dones, infos = env.step(actions_list) next_obs = make_state(next_obs) # Prevention of divergence # 안해주면 발산해서 학습 불가 (NaN) rewards = rewards - 0.000001 # Reform Done Flag list # replay buffer에 알맞도록 done 리스트 재구성 obs = next_obs env.close()
def __init__(self, num_agents): self.env = football_env.create_environment( env_name='test_example_multiagent', stacked=False, logdir=os.path.join(tempfile.gettempdir(), 'rllib_test'), write_goal_dumps=False, write_full_episode_dumps=False, render=True, dump_frequency=0, number_of_left_players_agent_controls=num_agents, channel_dimensions=(42, 42)) self.action_space = gym.spaces.Discrete(self.env.action_space.nvec[1]) self.observation_space = gym.spaces.Box( low=self.env.observation_space.low[0], high=self.env.observation_space.high[0], dtype=self.env.observation_space.dtype) self.num_agents = num_agents
def create_single_football_env(seed, level='academy_empty_goal_close'): """Creates gfootball environment.""" env = football_env.create_environment( env_name=level, stacked=('stacked' in args.state), rewards=args.reward_experiment, # logdir="/home/sahar/downloads", # enable_goal_videos=args.dump_scores and (seed == 0), # enable_full_episode_videos=args.dump_full_episodes and (seed == 0), render=args.render and (seed == 0), dump_frequency=50 if args.render and seed == 0 else 0) print("Creating env:{}".format(level)) # env = monitor.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), # str(seed))) return env
def create_single_football_env(seed, level): """Creates gfootball environment.""" env = football_env.create_environment(env_name=level, stacked=('stacked' in 'extracted_stacked'), rewards='scoring', logdir=logger.get_dir(), enable_goal_videos=False, enable_full_episode_videos=False, render=False, dump_frequency=0) env = monitor.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(seed))) return env
def create_single_football_env(seed): """Creates gfootball environment.""" env = football_env.create_environment( env_name=FLAGS.level, stacked=('stacked' in FLAGS.state), with_checkpoints=('with_checkpoints' in FLAGS.reward_experiment), logdir=logger.get_dir(), enable_goal_videos=FLAGS.dump_scores and (seed == 0), enable_full_episode_videos=FLAGS.dump_full_episodes and (seed == 0), render=FLAGS.render and (seed == 0), dump_frequency=50 if FLAGS.render and seed == 0 else 0) env = monitor.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(seed))) return env
def make_football(env_id, max_episode_steps=None): 'Modified make_atari to work with football env' env = football_env.create_environment(env_name=env_id, stacked=False, representation='extracted', write_goal_dumps=False, write_full_episode_dumps=False, rewards='scoring,checkpoints', channel_dimensions=(84, 84), render=False) # env = NoopResetEnv(env, noop_max=30) # env = MaxAndSkipEnv(env, skip=4) # TODO: does this make sense? assert max_episode_steps is None return env
def __init__(self, num_agents): self.env = football_env.create_environment( env_name='test_example_multiagent', stacked=False, rewards='scoring', #logdir='/tmp/rllib_test', enable_goal_videos=False, enable_full_episode_videos=False, render=True, dump_frequency=0, number_of_left_players_agent_controls=num_agents, channel_dimensions=(42, 42)) #self.actions_are_logits = env_config.get("actions_are_logits", False) self.action_space = gym.spaces.Discrete(self.env.action_space.nvec[1]) self.observation_space = gym.spaces.Box( low=self.env.observation_space.low[0], high=self.env.observation_space.high[0], dtype=self.env.observation_space.dtype) self.num_agents = num_agents