def __init__(self): self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() print(self.hfo_path) self._configure_environment() self.env = hfo_py.HFOEnvironment() self.env.connectToServer(team_name="base_right", play_goalie=True, config_dir=hfo_py.get_config_path(), server_port=self.server_port) print("Shape =",self.env.getStateSize()) self.observation_space = spaces.Box(low=-1, high=1, shape=((self.env.getStateSize(),)), dtype=np.float32) # Action space omits the Tackle/Catch actions, which are useful on defense low0 = np.array([0, -180], dtype=np.float32) high0 = np.array([100, 180], dtype=np.float32) low1 = np.array([-180], dtype=np.float32) high1 = np.array([180], dtype=np.float32) low2 = np.array([0, -180], dtype=np.float32) high2 = np.array([100, 180], dtype=np.float32) low3 = np.array([0], dtype=np.float32) high3 = np.array([100], dtype=np.float32) self.action_space = spaces.Tuple((spaces.Discrete(5), spaces.Box(low=low0, high=high0, dtype=np.float32), spaces.Box(low=low1, high=high1, dtype=np.float32), spaces.Box(low=low2, high=high2, dtype=np.float32), spaces.Box(low=low3, high=high3, dtype=np.float32))) self.status = hfo_py.IN_GAME self._seed = -1
def __init__(self): self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() # need to check unused port sock, port2use = self.bind_unused_port() num_offense_agents = 1 num_offense_agents_npcs = 1 self._configure_environment(port2use, num_offense_agents, num_offense_agents_npcs) #process = subprocess.Popen(self.hfo_path+' --offense-agents=1 --defense-npcs=1', shell=True, stdout=subprocess.PIPE) self.env = hfo_py.HFOEnvironment() self.env.connectToServer(feature_set=hfo_py.HIGH_LEVEL_FEATURE_SET, config_dir=hfo_py.get_config_path(), server_port=port2use) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.env.getStateSize())) # Action space omits the Tackle/Catch actions, which are useful on defense self.action_space = spaces.Tuple( (spaces.Discrete(3), spaces.Box(low=0, high=100, shape=1), spaces.Box(low=-180, high=180, shape=1), spaces.Box(low=-180, high=180, shape=1), spaces.Box(low=0, high=100, shape=1), spaces.Box(low=-180, high=180, shape=1))) # self.action_space = spaces.Tuple(spaces.Discrete(3), # spaces.Box(low=np.array([0,-180,-180,0,-180]), high=np.array([100,180,180,100,180]))) self.status = hfo_py.IN_GAME
def __init__(self): self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() self._configure_environment() if hasattr(self, '_static_keeper') and self._static_keeper: print('\n\nstatic\n\n') self._static_keeper_process = Process( target=self._start_static_keeper, args=[self.server_port]) self._static_keeper_process.start() self.env = hfo_py.HFOEnvironment() self.env.connectToServer(config_dir=hfo_py.get_config_path(), server_port=self.server_port) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.env.getStateSize())) # Action space omits the Tackle/Catch actions, which are useful on defense self.action_space = spaces.Tuple( (spaces.Discrete(3), spaces.Box(low=0, high=100, shape=1), spaces.Box(low=-180, high=180, shape=1), spaces.Box(low=-180, high=180, shape=1), spaces.Box(low=0, high=100, shape=1), spaces.Box(low=-180, high=180, shape=1))) self.status = hfo_py.IN_GAME
def __init__(self): self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() self._configure_environment() self.env = hfo_py.HFOEnvironment() self.env.connectToServer(config_dir=hfo_py.get_config_path()) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.env.getStateSize())) # Action space omits the Tackle/Catch actions, which are useful on defense # self.action_space = spaces.Tuple((spaces.Discrete(3), # spaces.Box(low=0, high=100, shape=1), # spaces.Box(low=-180, high=180, shape=1), # spaces.Box(low=-180, high=180, shape=1), # spaces.Box(low=0, high=100, shape=1), # spaces.Box(low=-180, high=180, shape=1))) # Modified action space, this modification will combine the parameter for the # same discrete action into a single action spaces self.action_space = spaces.Tuple( (spaces.Discrete(3), spaces.Box(low=np.array([0.0, -180.0]), high=np.array([100.0, 180.0])), spaces.Box(low=np.array([-180.0]), high=np.array([180.0])), spaces.Box(low=np.array([0.0, -180.0]), high=np.array([100.0, 180.0])))) self.status = hfo_py.IN_GAME
def __init__(self, config, port): print("single agent", port) self.server_port = port self.hfo_path = hfo_py.get_hfo_path() self.env = hfo_py.HFOEnvironment() if "feature_set" in config: self.env.connectToServer(feature_set=config['feature_set'], config_dir=hfo_py.get_config_path(), server_port=self.server_port) else: self.env.connectToServer(config_dir=hfo_py.get_config_path(), server_port=self.server_port) self.observation_space = spaces.Box( low=-1, high=1, shape=((self.env.getStateSize(), )), dtype=np.float32) print("single agent init", self.observation_space) self.action_space = spaces.Discrete(14) self.status = hfo_py.IN_GAME self._seed = -1 self.old_ball_prox = 0 self.old_kickable = 0 self.old_ball_dist_goal = 0 self.got_kickable_reward = False self.first_step = True self.unum = self.env.getUnum( ) # uniform number (identifier) of our lone agent
def __init__(self, config): self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() print(self.hfo_path) self._configure_environment(config) self.env = hfo_py.HFOEnvironment() if "feature_set" in config : self.env.connectToServer( feature_set=config['feature_set'], config_dir=hfo_py.get_config_path(), server_port=self.server_port) else : self.env.connectToServer( config_dir=hfo_py.get_config_path(), server_port=self.server_port) print("Shape =",self.env.getStateSize()) self.observation_space = spaces.Box(low=-1, high=1, shape=((self.env.getStateSize(),)), dtype=np.float32) # Action space omits the Tackle/Catch actions, which are useful on defense # 包括: # Go To Ball() # Move() # Shoot() # Dribble() self.action_space = spaces.Discrete(4) self.status = hfo_py.IN_GAME self._seed = -1
def __init__(self): super(hfoENV, self) self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() self.configure() self.env = hfo_py.HFOEnvironment() self.env.connectToServer(feature_set=LOW_LEVEL_FEATURE_SET, config_dir=hfo_py.get_config_path(), ) self.game_info = GameInfo(1)
def __init__(self): self.discrete_action_size = 6 self.continuous_action_size = 7 self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() self._configure_environment() self.env = hfo_py.HFOEnvironment() self.env.connectToServer(config_dir=hfo_py.get_config_path()) self.observation_space = spaces.Box( low=-1, high=1, shape=((self.env.getStateSize(), ))) # Action space omits the Tackle/Catch actions, which are useful on defense action_dim = self.continuous_action_size + self.discrete_action_size self.action_space = spaces.Box(low=0, high=1, shape=(action_dim, )) self.status = hfo_py.IN_GAME
def __init__(self): self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() self._configure_environment() self.env = hfo_py.HFOEnvironment() self.env.connectToServer(config_dir=hfo_py.get_config_path()) self.observation_space = spaces.Box(low=-1, high=1, shape=(self.env.getStateSize())) # Action space omits the Tackle/Catch actions, which are useful on defense self.action_space = spaces.Tuple((spaces.Discrete(3), spaces.Box(low=0, high=100, shape=1), spaces.Box(low=-180, high=180, shape=1), spaces.Box(low=-180, high=180, shape=1), spaces.Box(low=0, high=100, shape=1), spaces.Box(low=-180, high=180, shape=1))) self.status = hfo_py.IN_GAME
def __init__(self): self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() self._configure_environment() self.env = hfo_py.HFOEnvironment() self.env.connectToServer(config_dir=hfo_py.get_config_path()) # ここでgetStateSize()がint型だとこの先のSpaces.Boxの__init()でtuple(shape)つまりtuple(int型)になるためTypeErrorでる #self.observation_space = spaces.Box(low=-1, high=1, # shape=(self.env.getStateSize())) #そのため以下に変更 self.observation_space = spaces.Box(low=-1, high=1, shape=(self.env.getStateSize(), )) # Action space omits the Tackle/Catch actions, which are useful on defense # action_spaceもshapeでTypeErrrorがでる #self.action_space = spaces.Tuple((spaces.Discrete(3), # spaces.Box(low=0, high=100, shape=(1,)), # spaces.Box(low=-180, high=180, shape=(1,)), # spaces.Box(low=-180, high=180, shape=(1,)), # spaces.Box(low=0, high=100, shape=(1,)), # spaces.Box(low=-180, high=180, shape=(1,)))) #そのため以下に変更、参考:https://github.com/cycraig/gym-soccer/blob/master/gym_soccer/envs/soccer_env.py low0 = np.array([0, -180], dtype=np.float32) high0 = np.array([100, 180], dtype=np.float32) low1 = np.array([-180], dtype=np.float32) high1 = np.array([180], dtype=np.float32) low2 = np.array([0, -180], dtype=np.float32) high2 = np.array([100, 180], dtype=np.float32) low3 = np.array([-180], dtype=np.float32) high3 = np.array([180], dtype=np.float32) self.action_space = spaces.Tuple((spaces.Discrete(3), spaces.Box(low=low0, high=high0, dtype=np.float32), spaces.Box(low=low1, high=high1, dtype=np.float32), spaces.Box(low=low2, high=high2, dtype=np.float32))) self.status = hfo_py.IN_GAME
def __init__(self, config): self.viewer = None self.server_process = None self.rcspid = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() #print(self.hfo_path) self._configure_environment(config) self.env = hfo_py.HFOEnvironment() self.one_hot_state_encoding = config.get("one_hot_state_encoding", False) # num = config.pop("num_agents", 1) self.num = config["server_config"]["offense_agents"] self.agents = [] for i in range(self.num): self.agents.append(env_name_or_creator.remote(config, self.server_port)) time.sleep(2) self.dones = set()
def __init__(self): self.viewer = None self.server_process = None self.server_port = None self.hfo_path = hfo_py.get_hfo_path() print(self.hfo_path) self._configure_environment() self.env = hfo_py.HFOEnvironment() self.env.connectToServer(config_dir=hfo_py.get_config_path(), server_port=self.server_port) print("Shape =", self.env.getStateSize()) self.observation_space = spaces.Box( low=-1, high=1, shape=((self.env.getStateSize(), )), dtype=np.float32) # Action space omits the Tackle/Catch actions, which are useful on defense # 这个比如说 动作0有两个参数,第一个参数的范围是0-100 ,第二个参数的范围是-180,180 那么最低值为[0, -180] 最高值的集合为[180,180] low0 = np.array([0, -180], dtype=np.float32) high0 = np.array([100, 180], dtype=np.float32) low1 = np.array([-180], dtype=np.float32) high1 = np.array([180], dtype=np.float32) low2 = np.array([0, -180], dtype=np.float32) high2 = np.array([100, 180], dtype=np.float32) low3 = np.array([-180], dtype=np.float32) high3 = np.array([180], dtype=np.float32) self.action_space = spaces.Tuple((spaces.Discrete(3), spaces.Box(low=low0, high=high0, dtype=np.float32), spaces.Box(low=low1, high=high1, dtype=np.float32), spaces.Box(low=low2, high=high2, dtype=np.float32))) self.status = hfo_py.IN_GAME self._seed = -1
def configure_agent(player, team, port=6000, feature='low', address='localhost', record_dir=None): is_goalie = True if player == 'goalie' else False feature_set = hfo_py.LOW_LEVEL_FEATURE_SET if feature == 'low' else hfo_py.HIGH_LEVEL_FEATURE_SET agent_args = { 'config_dir': hfo_py.get_hfo_path()[:-3] + 'teams/base/config/formations-dt', 'feature_set': feature_set, # High or low level state features 'server_port': port, # port to connect to server on 'server_addr': address, # address of server 'team_name': team, # name of self.team to join 'play_goalie': is_goalie, # is this player the goalie } if record_dir: agent_args[ 'record_dir'] = record_dir # record agent's states/actions/rewards to this directory return agent_args
def main(args): from gailtf.baselines.ppo1 import mlp_policy U.make_session(num_cpu=args.num_cpu).__enter__() set_global_seeds(args.seed) env = gym.make(args.env_id) def policy_fn(name, ob_space, ac_space, reuse=False): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=64, num_hid_layers=2) env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) task_name = get_task_name(args) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) cmd = hfo_py.get_hfo_path( ) + ' --offense-npcs=1 --defense-npcs=1 --log-dir /home/yupeng/Desktop/workspace/src2/GAMIL-tf0/gail-tf/log/soccer_data/ --record --frames=200' print(cmd) # os.system(cmd) dataset = Mujoco_Dset(expert_data_path=args.expert_data_path, ret_threshold=args.ret_threshold, traj_limitation=args.traj_limitation) # previous: dataset = Mujoco_Dset(expert_path=args.expert_path, ret_threshold=args.ret_threshold, traj_limitation=args.traj_limitation) pretrained_weight = None if (args.pretrained and args.task == 'train') or args.algo == 'bc': # Pretrain with behavior cloning from gailtf.algo import behavior_clone if args.algo == 'bc' and args.task == 'evaluate': behavior_clone.evaluate(env, policy_fn, args.load_model_path_high, args.load_model_path_low, stochastic_policy=args.stochastic_policy) sys.exit() if args.task == 'train' and args.action_space_level == 'high': print("training high level policy") pretrained_weight_high = behavior_clone.learn( env, policy_fn, dataset, max_iters=args.BC_max_iter, pretrained=args.pretrained, ckpt_dir=args.checkpoint_dir + '/high_level', log_dir=args.log_dir + '/high_level', task_name=task_name, high_level=True) if args.task == 'train' and args.action_space_level == 'low': print("training low level policy") pretrained_weight_low = behavior_clone.learn( env, policy_fn, dataset, max_iters=args.BC_max_iter, pretrained=args.pretrained, ckpt_dir=args.checkpoint_dir + '/low_level', log_dir=args.log_dir + '/low_level', task_name=task_name, high_level=False) if args.algo == 'bc': sys.exit() from gailtf.network.adversary import TransitionClassifier # discriminator discriminator = TransitionClassifier(env, args.adversary_hidden_size, entcoeff=args.adversary_entcoeff) if args.algo == 'trpo': # Set up for MPI seed from mpi4py import MPI rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env.seed(workerseed) from gailtf.algo import trpo_mpi if args.task == 'train': trpo_mpi.learn(env, policy_fn, discriminator, dataset, pretrained=args.pretrained, pretrained_weight=pretrained_weight, g_step=args.g_step, d_step=args.d_step, timesteps_per_batch=1024, max_kl=args.max_kl, cg_iters=10, cg_damping=0.1, max_timesteps=args.num_timesteps, entcoeff=args.policy_entcoeff, gamma=0.995, lam=0.97, vf_iters=5, vf_stepsize=1e-3, ckpt_dir=args.checkpoint_dir, log_dir=args.log_dir, save_per_iter=args.save_per_iter, load_model_path=args.load_model_path, task_name=task_name) elif args.task == 'evaluate': trpo_mpi.evaluate(env, policy_fn, args.load_model_path, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy) else: raise NotImplementedError else: raise NotImplementedError env.close()
def __init__(self): self.viewer = None self.server_process = None self.server_port = 6000 self.hfo_path = hfo_py.get_hfo_path() print('HFO path: ', self.hfo_path)