Beispiel #1
0
    def _initialize(self, env):
        assert isinstance(env.observation_space, (Box, Discrete)) and isinstance(env.action_space, (Box, Discrete)), 'action_space and observation_space must be one of available_type'
        # process observation
        ObsSpace = env.observation_space
        if isinstance(ObsSpace, Box):
            self.vector_dims = [ObsSpace.shape[0] if len(ObsSpace.shape) == 1 else 0]
            # self.obs_high = ObsSpace.high
            # self.obs_low = ObsSpace.low
        else:
            self.vector_dims = [int(ObsSpace.n)]
        if len(ObsSpace.shape) == 3:
            self.obs_type = 'visual'
            self.visual_dims = [list(ObsSpace.shape)]
        else:
            self.obs_type = 'vector'
            self.visual_dims = []

        self.vector_info_type = NamedTupleStaticClass.generate_obs_namedtuple(n_agents=self.n,
                                                                              item_nums=1 if self.obs_type == 'vector' else 0,
                                                                              name='vector')
        self.visual_info_type = NamedTupleStaticClass.generate_obs_namedtuple(n_agents=self.n,
                                                                              item_nums=1 if self.obs_type == 'visual' else 0,
                                                                              name='vector')

        # process action
        ActSpace = env.action_space
        if isinstance(ActSpace, Box):
            assert len(ActSpace.shape) == 1, 'if action space is continuous, the shape length of action must equal to 1'
            self.action_type = 'continuous'
            self._is_continuous = True
            self.a_dim = ActSpace.shape[0]
        elif isinstance(ActSpace, Tuple):
            assert all([isinstance(i, Discrete) for i in ActSpace]) == True, 'if action space is Tuple, each item in it must have type Discrete'
            self.action_type = 'Tuple(Discrete)'
            self._is_continuous = False
            self.a_dim = int(np.asarray([i.n for i in ActSpace]).prod())
            discrete_action_dim_list = [i.n for i in ActSpace]
        else:
            self.action_type = 'discrete'
            self._is_continuous = False
            self.a_dim = env.action_space.n
            discrete_action_dim_list = [env.action_space.n]
        if not self._is_continuous:
            self.discrete_action_list = get_discrete_action_list(discrete_action_dim_list)

        self.reward_threshold = env.env.spec.reward_threshold  # reward threshold refer to solved
        self.EnvSpec = SingleAgentEnvArgs(
            obs_spec=ObsSpec(vector_dims=self.vector_dims,
                             visual_dims=self.visual_dims),
            a_dim=self.a_dim,
            is_continuous=self._is_continuous,
            n_agents=self.n
        )
Beispiel #2
0
    def initialize_environment(self):
        '''
        初始化环境,获取必要的信息,如状态、动作维度等等
        '''

        self.behavior_names = list(self.env.behavior_specs.keys())
        self.is_multi_agents = len(self.behavior_names) > 1
        self.first_bn = self.behavior_names[0]
        self.first_fbn = self.first_bn.replace('?', '_')

        self.behavior_agents = defaultdict(int)
        self.behavior_ids = defaultdict(dict)
        self.vector_idxs = defaultdict(list)
        self.vector_dims = defaultdict(list)
        self.visual_idxs = defaultdict(list)
        self.visual_dims = defaultdict(list)
        self.a_dim = defaultdict(int)
        self.discrete_action_lists = {}
        self.is_continuous = {}
        self.empty_actiontuples = {}

        self.vector_info_type = {}
        self.visual_info_type = {}

        self.env.reset()
        for bn, spec in self.env.behavior_specs.items():
            d, t = self.env.get_steps(bn)
            self.behavior_agents[bn] = len(d)
            self.behavior_ids[bn] = d.agent_id_to_index

            for i, shape in enumerate(spec.observation_shapes):
                if len(shape) == 1:
                    self.vector_idxs[bn].append(i)
                    self.vector_dims[bn].append(shape[0])
                elif len(shape) == 3:
                    self.visual_idxs[bn].append(i)
                    self.visual_dims[bn].append(list(shape))
                else:
                    raise ValueError(
                        "shape of observation cannot be understood.")
            self.vector_info_type[
                bn] = NamedTupleStaticClass.generate_obs_namedtuple(
                    n_agents=self.behavior_agents[bn],
                    item_nums=len(self.vector_idxs[bn]),
                    name='vector')
            self.visual_info_type[
                bn] = NamedTupleStaticClass.generate_obs_namedtuple(
                    n_agents=self.behavior_agents[bn],
                    item_nums=len(self.visual_idxs[bn]),
                    name='visual')

            action_spec = spec.action_spec
            if action_spec.is_continuous():
                self.a_dim[bn] = action_spec.continuous_size
                self.discrete_action_lists[bn] = None
                self.is_continuous[bn] = True
            elif action_spec.is_discrete():
                self.a_dim[bn] = int(
                    np.asarray(action_spec.discrete_branches).prod())
                self.discrete_action_lists[bn] = get_discrete_action_list(
                    action_spec.discrete_branches)
                self.is_continuous[bn] = False
            else:
                raise NotImplementedError(
                    "doesn't support continuous and discrete actions simultaneously for now."
                )

            self.empty_actiontuples[bn] = action_spec.empty_action(
                n_agents=self.behavior_agents[bn])

        if self.is_multi_agents:
            self.behavior_controls = defaultdict(int)
            for bn in self.behavior_names:
                self.behavior_controls[bn] = int(bn.split('#')[0])
            self.env_copys = self.behavior_agents[
                self.first_bn] // self.behavior_controls[self.first_bn]