def __init__(self, env, device='cpu', use_td3=True): self.env = env self.device = device self.is_discrete_action = isinstance(env.action_space, gym.spaces.discrete.Discrete) self.obs_dim = env.observation_space.shape[0] gamma = 0.99 self.gamma = gamma if self.is_discrete_action: self.num_act = env.action_space.n self.algo = DQNWithRewardAlgo(self.obs_dim, self.num_act, gamma, use_td3=use_td3, device=device) else: self.act_dim = env.action_space.shape[0] self.algo = DDPGAlgo(self.obs_dim, self.act_dim, gamma, use_td3=use_td3, device=device) self.replay_buffer = ReplayBuffer() self.planner = Planner(trans_fn=self.planner_trans_fn, use_td3=use_td3, gamma=gamma, device=device) self.estimate_std()
def __init__(self, config_module=None): """ param:config_module: a scenario/robot specific module to prepare setup, that has the following members: get_all_conditions() -> return a list of conditions get_all_actions() -> return a list of actions """ self.memory = Memory() self.worldstate = WorldState() self.actions = set() if config_module is not None: for condition in config_module.get_all_conditions(self.memory): Condition.add(condition) for action in config_module.get_all_actions(self.memory): self.actions.add(action) self.planner = Planner(self.actions, self.worldstate, None) self._last_goal = None self._preempt_requested = False # preemption mechanism
def test_planner(): assert Planner()