예제 #1
0
    def __init__(self):
        self.viewer = None
        self.server_process = None
        self.server_port = None
        self.hfo_path = hfo_py.get_hfo_path()
        print(self.hfo_path)
        self._configure_environment()
        self.env = hfo_py.HFOEnvironment()
        self.env.connectToServer(team_name="base_right", play_goalie=True, config_dir=hfo_py.get_config_path(), server_port=self.server_port)
        print("Shape =",self.env.getStateSize())
        self.observation_space = spaces.Box(low=-1, high=1,
                                            shape=((self.env.getStateSize(),)), dtype=np.float32)
        # Action space omits the Tackle/Catch actions, which are useful on defense
        low0 = np.array([0, -180], dtype=np.float32) 
        high0 = np.array([100, 180], dtype=np.float32)
        low1 = np.array([-180], dtype=np.float32)
        high1 = np.array([180], dtype=np.float32)
        low2 = np.array([0, -180], dtype=np.float32)
        high2 = np.array([100, 180], dtype=np.float32)
        low3 = np.array([0], dtype=np.float32)
        high3 = np.array([100], dtype=np.float32)
        self.action_space = spaces.Tuple((spaces.Discrete(5),
                                          spaces.Box(low=low0, high=high0, dtype=np.float32),
                                          spaces.Box(low=low1, high=high1, dtype=np.float32),
                                          spaces.Box(low=low2, high=high2, dtype=np.float32),
                                          spaces.Box(low=low3, high=high3, dtype=np.float32)))

        self.status = hfo_py.IN_GAME
        self._seed = -1
예제 #2
0
    def __init__(self):
        self.viewer = None
        self.server_process = None
        self.server_port = None
        self.hfo_path = hfo_py.get_hfo_path()
        # need to check unused port
        sock, port2use = self.bind_unused_port()
        num_offense_agents = 1
        num_offense_agents_npcs = 1
        self._configure_environment(port2use, num_offense_agents,
                                    num_offense_agents_npcs)
        #process = subprocess.Popen(self.hfo_path+' --offense-agents=1 --defense-npcs=1', shell=True, stdout=subprocess.PIPE)
        self.env = hfo_py.HFOEnvironment()
        self.env.connectToServer(feature_set=hfo_py.HIGH_LEVEL_FEATURE_SET,
                                 config_dir=hfo_py.get_config_path(),
                                 server_port=port2use)
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(self.env.getStateSize()))
        # Action space omits the Tackle/Catch actions, which are useful on defense
        self.action_space = spaces.Tuple(
            (spaces.Discrete(3), spaces.Box(low=0, high=100, shape=1),
             spaces.Box(low=-180, high=180,
                        shape=1), spaces.Box(low=-180, high=180, shape=1),
             spaces.Box(low=0, high=100,
                        shape=1), spaces.Box(low=-180, high=180, shape=1)))

        # self.action_space = spaces.Tuple(spaces.Discrete(3),
        #                                    spaces.Box(low=np.array([0,-180,-180,0,-180]), high=np.array([100,180,180,100,180])))
        self.status = hfo_py.IN_GAME
예제 #3
0
    def __init__(self):
        self.viewer = None
        self.server_process = None
        self.server_port = None
        self.hfo_path = hfo_py.get_hfo_path()
        self._configure_environment()
        if hasattr(self, '_static_keeper') and self._static_keeper:
            print('\n\nstatic\n\n')
            self._static_keeper_process = Process(
                target=self._start_static_keeper, args=[self.server_port])
            self._static_keeper_process.start()
        self.env = hfo_py.HFOEnvironment()
        self.env.connectToServer(config_dir=hfo_py.get_config_path(),
                                 server_port=self.server_port)

        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(self.env.getStateSize()))
        # Action space omits the Tackle/Catch actions, which are useful on defense
        self.action_space = spaces.Tuple(
            (spaces.Discrete(3), spaces.Box(low=0, high=100, shape=1),
             spaces.Box(low=-180, high=180,
                        shape=1), spaces.Box(low=-180, high=180, shape=1),
             spaces.Box(low=0, high=100,
                        shape=1), spaces.Box(low=-180, high=180, shape=1)))
        self.status = hfo_py.IN_GAME
예제 #4
0
    def __init__(self):
        self.viewer = None
        self.server_process = None
        self.server_port = None
        self.hfo_path = hfo_py.get_hfo_path()
        self._configure_environment()
        self.env = hfo_py.HFOEnvironment()
        self.env.connectToServer(config_dir=hfo_py.get_config_path())
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(self.env.getStateSize()))
        # Action space omits the Tackle/Catch actions, which are useful on defense
        # self.action_space = spaces.Tuple((spaces.Discrete(3),
        #                                   spaces.Box(low=0, high=100, shape=1),
        #                                   spaces.Box(low=-180, high=180, shape=1),
        #                                   spaces.Box(low=-180, high=180, shape=1),
        #                                   spaces.Box(low=0, high=100, shape=1),
        #                                   spaces.Box(low=-180, high=180, shape=1)))

        # Modified action space, this modification will combine the parameter for the
        # same discrete action into a single action spaces
        self.action_space = spaces.Tuple(
            (spaces.Discrete(3),
             spaces.Box(low=np.array([0.0, -180.0]),
                        high=np.array([100.0, 180.0])),
             spaces.Box(low=np.array([-180.0]), high=np.array([180.0])),
             spaces.Box(low=np.array([0.0, -180.0]),
                        high=np.array([100.0, 180.0]))))

        self.status = hfo_py.IN_GAME
예제 #5
0
    def __init__(self, config, port):
        print("single agent", port)
        self.server_port = port
        self.hfo_path = hfo_py.get_hfo_path()
        self.env = hfo_py.HFOEnvironment()
        if "feature_set" in config:
            self.env.connectToServer(feature_set=config['feature_set'],
                                     config_dir=hfo_py.get_config_path(),
                                     server_port=self.server_port)
        else:
            self.env.connectToServer(config_dir=hfo_py.get_config_path(),
                                     server_port=self.server_port)
        self.observation_space = spaces.Box(
            low=-1,
            high=1,
            shape=((self.env.getStateSize(), )),
            dtype=np.float32)
        print("single agent init", self.observation_space)
        self.action_space = spaces.Discrete(14)

        self.status = hfo_py.IN_GAME
        self._seed = -1
        self.old_ball_prox = 0
        self.old_kickable = 0
        self.old_ball_dist_goal = 0
        self.got_kickable_reward = False
        self.first_step = True
        self.unum = self.env.getUnum(
        )  # uniform number (identifier) of our lone agent
예제 #6
0
    def __init__(self, config):
        self.viewer = None
        self.server_process = None
        self.server_port = None
        self.hfo_path = hfo_py.get_hfo_path()
        print(self.hfo_path)
        self._configure_environment(config)
        self.env = hfo_py.HFOEnvironment()
        if  "feature_set" in config :
            self.env.connectToServer( feature_set=config['feature_set'], config_dir=hfo_py.get_config_path(), server_port=self.server_port)
        else :
            self.env.connectToServer( config_dir=hfo_py.get_config_path(), server_port=self.server_port)
        print("Shape =",self.env.getStateSize())
        self.observation_space = spaces.Box(low=-1, high=1,
                                            shape=((self.env.getStateSize(),)), dtype=np.float32)
        # Action space omits the Tackle/Catch actions, which are useful on defense
        # 包括:
        #   Go To Ball()
        #   Move()
        #   Shoot()
        #   Dribble()
        self.action_space = spaces.Discrete(4)

        self.status = hfo_py.IN_GAME
        self._seed = -1
예제 #7
0
파일: Utils.py 프로젝트: yx3110/hfo_ac_v2
 def __init__(self):
     super(hfoENV, self)
     self.viewer = None
     self.server_process = None
     self.server_port = None
     self.hfo_path = hfo_py.get_hfo_path()
     self.configure()
     self.env = hfo_py.HFOEnvironment()
     self.env.connectToServer(feature_set=LOW_LEVEL_FEATURE_SET, config_dir=hfo_py.get_config_path(), )
     self.game_info = GameInfo(1)
    def __init__(self):
        self.discrete_action_size = 6
        self.continuous_action_size = 7

        self.viewer = None
        self.server_process = None
        self.server_port = None
        self.hfo_path = hfo_py.get_hfo_path()
        self._configure_environment()
        self.env = hfo_py.HFOEnvironment()
        self.env.connectToServer(config_dir=hfo_py.get_config_path())
        self.observation_space = spaces.Box(
            low=-1, high=1, shape=((self.env.getStateSize(), )))
        # Action space omits the Tackle/Catch actions, which are useful on defense
        action_dim = self.continuous_action_size + self.discrete_action_size
        self.action_space = spaces.Box(low=0, high=1, shape=(action_dim, ))
        self.status = hfo_py.IN_GAME
예제 #9
0
 def __init__(self):
     self.viewer = None
     self.server_process = None
     self.server_port = None
     self.hfo_path = hfo_py.get_hfo_path()
     self._configure_environment()
     self.env = hfo_py.HFOEnvironment()
     self.env.connectToServer(config_dir=hfo_py.get_config_path())
     self.observation_space = spaces.Box(low=-1, high=1,
                                         shape=(self.env.getStateSize()))
     # Action space omits the Tackle/Catch actions, which are useful on defense
     self.action_space = spaces.Tuple((spaces.Discrete(3),
                                       spaces.Box(low=0, high=100, shape=1),
                                       spaces.Box(low=-180, high=180, shape=1),
                                       spaces.Box(low=-180, high=180, shape=1),
                                       spaces.Box(low=0, high=100, shape=1),
                                       spaces.Box(low=-180, high=180, shape=1)))
     self.status = hfo_py.IN_GAME
예제 #10
0
파일: soccer_env.py 프로젝트: machinaut/gym
 def __init__(self):
     self.viewer = None
     self.server_process = None
     self.server_port = None
     self.hfo_path = hfo_py.get_hfo_path()
     self._configure_environment()
     self.env = hfo_py.HFOEnvironment()
     self.env.connectToServer(config_dir=hfo_py.get_config_path())
     self.observation_space = spaces.Box(low=-1, high=1,
                                         shape=(self.env.getStateSize()))
     # Action space omits the Tackle/Catch actions, which are useful on defense
     self.action_space = spaces.Tuple((spaces.Discrete(3),
                                       spaces.Box(low=0, high=100, shape=1),
                                       spaces.Box(low=-180, high=180, shape=1),
                                       spaces.Box(low=-180, high=180, shape=1),
                                       spaces.Box(low=0, high=100, shape=1),
                                       spaces.Box(low=-180, high=180, shape=1)))
     self.status = hfo_py.IN_GAME
예제 #11
0
 def __init__(self):
     self.viewer = None
     self.server_process = None
     self.server_port = None
     self.hfo_path = hfo_py.get_hfo_path()
     self._configure_environment()
     self.env = hfo_py.HFOEnvironment()
     self.env.connectToServer(config_dir=hfo_py.get_config_path())
     # ここでgetStateSize()がint型だとこの先のSpaces.Boxの__init()でtuple(shape)つまりtuple(int型)になるためTypeErrorでる
     #self.observation_space = spaces.Box(low=-1, high=1,
     #                                    shape=(self.env.getStateSize()))
     #そのため以下に変更
     self.observation_space = spaces.Box(low=-1,
                                         high=1,
                                         shape=(self.env.getStateSize(), ))
     # Action space omits the Tackle/Catch actions, which are useful on defense
     # action_spaceもshapeでTypeErrrorがでる
     #self.action_space = spaces.Tuple((spaces.Discrete(3),
     #                                  spaces.Box(low=0, high=100, shape=(1,)),
     #                                  spaces.Box(low=-180, high=180, shape=(1,)),
     #                                  spaces.Box(low=-180, high=180, shape=(1,)),
     #                                  spaces.Box(low=0, high=100, shape=(1,)),
     #                                  spaces.Box(low=-180, high=180, shape=(1,))))
     #そのため以下に変更、参考:https://github.com/cycraig/gym-soccer/blob/master/gym_soccer/envs/soccer_env.py
     low0 = np.array([0, -180], dtype=np.float32)
     high0 = np.array([100, 180], dtype=np.float32)
     low1 = np.array([-180], dtype=np.float32)
     high1 = np.array([180], dtype=np.float32)
     low2 = np.array([0, -180], dtype=np.float32)
     high2 = np.array([100, 180], dtype=np.float32)
     low3 = np.array([-180], dtype=np.float32)
     high3 = np.array([180], dtype=np.float32)
     self.action_space = spaces.Tuple((spaces.Discrete(3),
                                       spaces.Box(low=low0,
                                                  high=high0,
                                                  dtype=np.float32),
                                       spaces.Box(low=low1,
                                                  high=high1,
                                                  dtype=np.float32),
                                       spaces.Box(low=low2,
                                                  high=high2,
                                                  dtype=np.float32)))
     self.status = hfo_py.IN_GAME
예제 #12
0
 def __init__(self, config):
     self.viewer = None
     self.server_process = None
     self.rcspid = None
     self.server_port = None
     self.hfo_path = hfo_py.get_hfo_path()
     #print(self.hfo_path)
     self._configure_environment(config)
     self.env = hfo_py.HFOEnvironment()
     self.one_hot_state_encoding = config.get("one_hot_state_encoding",
                                              False)
     # num = config.pop("num_agents", 1)
     self.num = config["server_config"]["offense_agents"]
     
     self.agents = []
     for i in range(self.num):
         self.agents.append(env_name_or_creator.remote(config, self.server_port))
         time.sleep(2)
     self.dones = set()
예제 #13
0
    def __init__(self):
        self.viewer = None
        self.server_process = None
        self.server_port = None
        self.hfo_path = hfo_py.get_hfo_path()
        print(self.hfo_path)
        self._configure_environment()
        self.env = hfo_py.HFOEnvironment()
        self.env.connectToServer(config_dir=hfo_py.get_config_path(),
                                 server_port=self.server_port)
        print("Shape =", self.env.getStateSize())
        self.observation_space = spaces.Box(
            low=-1,
            high=1,
            shape=((self.env.getStateSize(), )),
            dtype=np.float32)
        # Action space omits the Tackle/Catch actions, which are useful on defense
        # 这个比如说 动作0有两个参数,第一个参数的范围是0-100 ,第二个参数的范围是-180,180 那么最低值为[0, -180] 最高值的集合为[180,180]
        low0 = np.array([0, -180], dtype=np.float32)
        high0 = np.array([100, 180], dtype=np.float32)
        low1 = np.array([-180], dtype=np.float32)
        high1 = np.array([180], dtype=np.float32)
        low2 = np.array([0, -180], dtype=np.float32)
        high2 = np.array([100, 180], dtype=np.float32)
        low3 = np.array([-180], dtype=np.float32)
        high3 = np.array([180], dtype=np.float32)
        self.action_space = spaces.Tuple((spaces.Discrete(3),
                                          spaces.Box(low=low0,
                                                     high=high0,
                                                     dtype=np.float32),
                                          spaces.Box(low=low1,
                                                     high=high1,
                                                     dtype=np.float32),
                                          spaces.Box(low=low2,
                                                     high=high2,
                                                     dtype=np.float32)))

        self.status = hfo_py.IN_GAME
        self._seed = -1
예제 #14
0
def configure_agent(player,
                    team,
                    port=6000,
                    feature='low',
                    address='localhost',
                    record_dir=None):
    is_goalie = True if player == 'goalie' else False

    feature_set = hfo_py.LOW_LEVEL_FEATURE_SET if feature == 'low' else hfo_py.HIGH_LEVEL_FEATURE_SET

    agent_args = {
        'config_dir':
        hfo_py.get_hfo_path()[:-3] + 'teams/base/config/formations-dt',
        'feature_set': feature_set,  # High or low level state features
        'server_port': port,  # port to connect to server on
        'server_addr': address,  # address of server
        'team_name': team,  # name of self.team to join
        'play_goalie': is_goalie,  # is this player the goalie
    }
    if record_dir:
        agent_args[
            'record_dir'] = record_dir  # record agent's states/actions/rewards to this directory
    return agent_args
예제 #15
0
def main(args):
    from gailtf.baselines.ppo1 import mlp_policy
    U.make_session(num_cpu=args.num_cpu).__enter__()
    set_global_seeds(args.seed)
    env = gym.make(args.env_id)

    def policy_fn(name, ob_space, ac_space, reuse=False):
        return mlp_policy.MlpPolicy(name=name,
                                    ob_space=ob_space,
                                    ac_space=ac_space,
                                    reuse=reuse,
                                    hid_size=64,
                                    num_hid_layers=2)

    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), "monitor.json"))
    env.seed(args.seed)
    gym.logger.setLevel(logging.WARN)
    task_name = get_task_name(args)
    args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
    args.log_dir = osp.join(args.log_dir, task_name)
    cmd = hfo_py.get_hfo_path(
    ) + ' --offense-npcs=1 --defense-npcs=1 --log-dir /home/yupeng/Desktop/workspace/src2/GAMIL-tf0/gail-tf/log/soccer_data/ --record --frames=200'
    print(cmd)
    # os.system(cmd)

    dataset = Mujoco_Dset(expert_data_path=args.expert_data_path,
                          ret_threshold=args.ret_threshold,
                          traj_limitation=args.traj_limitation)

    # previous: dataset = Mujoco_Dset(expert_path=args.expert_path, ret_threshold=args.ret_threshold, traj_limitation=args.traj_limitation)
    pretrained_weight = None

    if (args.pretrained and args.task == 'train') or args.algo == 'bc':
        # Pretrain with behavior cloning
        from gailtf.algo import behavior_clone
        if args.algo == 'bc' and args.task == 'evaluate':
            behavior_clone.evaluate(env,
                                    policy_fn,
                                    args.load_model_path_high,
                                    args.load_model_path_low,
                                    stochastic_policy=args.stochastic_policy)
            sys.exit()
        if args.task == 'train' and args.action_space_level == 'high':
            print("training high level policy")
            pretrained_weight_high = behavior_clone.learn(
                env,
                policy_fn,
                dataset,
                max_iters=args.BC_max_iter,
                pretrained=args.pretrained,
                ckpt_dir=args.checkpoint_dir + '/high_level',
                log_dir=args.log_dir + '/high_level',
                task_name=task_name,
                high_level=True)
        if args.task == 'train' and args.action_space_level == 'low':
            print("training low level policy")
            pretrained_weight_low = behavior_clone.learn(
                env,
                policy_fn,
                dataset,
                max_iters=args.BC_max_iter,
                pretrained=args.pretrained,
                ckpt_dir=args.checkpoint_dir + '/low_level',
                log_dir=args.log_dir + '/low_level',
                task_name=task_name,
                high_level=False)
        if args.algo == 'bc':
            sys.exit()

    from gailtf.network.adversary import TransitionClassifier
    # discriminator
    discriminator = TransitionClassifier(env,
                                         args.adversary_hidden_size,
                                         entcoeff=args.adversary_entcoeff)
    if args.algo == 'trpo':
        # Set up for MPI seed
        from mpi4py import MPI
        rank = MPI.COMM_WORLD.Get_rank()
        if rank != 0:
            logger.set_level(logger.DISABLED)
        workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank()
        set_global_seeds(workerseed)
        env.seed(workerseed)
        from gailtf.algo import trpo_mpi
        if args.task == 'train':
            trpo_mpi.learn(env,
                           policy_fn,
                           discriminator,
                           dataset,
                           pretrained=args.pretrained,
                           pretrained_weight=pretrained_weight,
                           g_step=args.g_step,
                           d_step=args.d_step,
                           timesteps_per_batch=1024,
                           max_kl=args.max_kl,
                           cg_iters=10,
                           cg_damping=0.1,
                           max_timesteps=args.num_timesteps,
                           entcoeff=args.policy_entcoeff,
                           gamma=0.995,
                           lam=0.97,
                           vf_iters=5,
                           vf_stepsize=1e-3,
                           ckpt_dir=args.checkpoint_dir,
                           log_dir=args.log_dir,
                           save_per_iter=args.save_per_iter,
                           load_model_path=args.load_model_path,
                           task_name=task_name)
        elif args.task == 'evaluate':
            trpo_mpi.evaluate(env,
                              policy_fn,
                              args.load_model_path,
                              timesteps_per_batch=1024,
                              number_trajs=10,
                              stochastic_policy=args.stochastic_policy)
        else:
            raise NotImplementedError
    else:
        raise NotImplementedError

    env.close()
예제 #16
0
 def __init__(self):
     self.viewer = None
     self.server_process = None
     self.server_port = 6000
     self.hfo_path = hfo_py.get_hfo_path()
     print('HFO path: ', self.hfo_path)