예제 #1
0
    def __init__(self, env, device='cpu', use_td3=True):
        self.env = env
        self.device = device
        self.is_discrete_action = isinstance(env.action_space, gym.spaces.discrete.Discrete)
        self.obs_dim = env.observation_space.shape[0]

        gamma = 0.99
        self.gamma = gamma

        if self.is_discrete_action:
            self.num_act = env.action_space.n
            self.algo = DQNWithRewardAlgo(self.obs_dim, self.num_act, gamma, use_td3=use_td3, device=device)
        else:
            self.act_dim = env.action_space.shape[0]
            self.algo = DDPGAlgo(self.obs_dim, self.act_dim, gamma, use_td3=use_td3, device=device)

        self.replay_buffer = ReplayBuffer()

        self.planner = Planner(trans_fn=self.planner_trans_fn, use_td3=use_td3, gamma=gamma, device=device)

        self.estimate_std()
예제 #2
0
    def __init__(self, config_module=None):
        """
        param:config_module: a scenario/robot specific module to prepare setup,
                that has the following members:
                    get_all_conditions() -> return a list of conditions
                    get_all_actions() -> return a list of actions
        """
        self.memory = Memory()
        self.worldstate = WorldState()
        self.actions = set()

        if config_module is not None:
            for condition in config_module.get_all_conditions(self.memory):
                Condition.add(condition)
            for action in config_module.get_all_actions(self.memory):
                self.actions.add(action)

        self.planner = Planner(self.actions, self.worldstate, None)

        self._last_goal = None
        self._preempt_requested = False  # preemption mechanism
예제 #3
0
def test_planner():
    assert Planner()