def __init__(self, iter_num, iter_size, state_dim, action_dim, max_action,
                 batch_size, discount, tau, expl_noise, policy_noise,
                 noise_clip, policy_freq, max_iter, start_time_steps,
                 total_iter, terrain_var, her_var):

        self.iter_num = iter_num
        self.iter_size = iter_size

        kwargs = {
            "state_dim": state_dim,
            "action_dim": action_dim,
            "max_action": max_action,
            "discount": discount,
            "tau": tau,
            "policy_noise": policy_noise * max_action,
            "noise_clip": noise_clip * max_action,
            "policy_freq": policy_freq
        }

        self.action_dim = action_dim

        self.grid = GridWorldContinuous(grid_mode='standard',
                                        terrain=terrain_var)
        self.grid.reset_env_terrain()

        self.manager_policy = TD3(**kwargs)
        self.replay_buffer = utils.ReplayBufferHIRO(state_dim,
                                                    action_dim,
                                                    max_size=int(2e5))

        self.policy = ContinuousMLP(state_dim,
                                    action_dim,
                                    hidden_sizes=[64, 64, 64],
                                    activation=F.relu,
                                    is_disc_action=False)
        self.optimizer = TRPO(policy=self.policy,
                              use_gpu=False,
                              max_kl=5e-4,
                              damping=5e-3,
                              use_fim=False,
                              discount=0.99,
                              imp_weight=False)

        self.manager = Manager()

        self.batch_size = batch_size
        self.expl_noise = expl_noise
        self.expl_noise_start = expl_noise
        self.max_action = max_action

        self.total_steps = 0
        self.max_iter = max_iter
        self.start_time_steps = start_time_steps

        self.manager_time_scale = 2

        self.current_iter = 0
        self.total_iter = total_iter

        self.her_var = her_var
    def __init__(self,
                 env_mode,
                 op_mode,
                 state_dim,
                 action_dim,
                 config,
                 save_path=None
                 ):
        self.config = config
        self.save_path = save_path
        self.grid = GridWorldContinuous(grid_mode='custom',
                                        layout=np.load('maze1_layout.npy'),
                                        objects=np.load('maze1_objects.npy'),
                                        waypoint=True,
                                        terrain=self.config.terrain_var)
        self.grid.goal = (13., 13.)
        self.grid.reset_env_terrain()

        self.policy = ContinuousMLP(state_dim,
                                    action_dim,
                                    hidden_sizes=config.policy_hidden_sizes,
                                    activation=F.relu,
                                    is_disc_action=False)
        self.optimizer = TRPO(policy=self.policy,
                              use_gpu=False,
                              max_kl=config.policy_max_kl,
                              damping=config.policy_damp_val,
                              use_fim=False,
                              discount=config.discount_factor,
                              imp_weight=False)

        self.env_mode = env_mode
        self.op_mode = op_mode
        self.state_dim = state_dim
        self.episode_steps = 0
Exemple #3
0
    def __init__(self,
                 env_mode,
                 op_mode,
                 state_dim,
                 action_dim,
                 config,
                 save_path=None):
        self.config = config
        self.save_path = save_path
        self.grid = GridWorldContinuous(grid_mode='custom',
                                        layout=np.load('maze1_layout.npy'),
                                        objects=np.load('maze1_objects.npy'),
                                        terrain=self.config.terrain_var)
        self.grid.reset_env_terrain()

        self.policy = ContinuousMLP(state_dim,
                                    action_dim,
                                    hidden_sizes=config.policy_hidden_sizes,
                                    activation=F.relu,
                                    is_disc_action=False)
        self.optimizer = TRPO(policy=self.policy,
                              use_gpu=False,
                              max_kl=config.policy_max_kl,
                              damping=config.policy_damp_val,
                              use_fim=False,
                              discount=config.discount_factor,
                              imp_weight=False)
        self.mvprop = MVPROPFAT(k=config.mvprop_k).cuda()
        self.mvprop_target = copy.deepcopy(self.mvprop).cuda()
        self.memory = ReplayBufferVIN(2, 1, self.grid.x_size,
                                      config.mvprop_buffer_size)
        self.mvprop_optimizer = MVPROPOptimizerTD0(
            self.mvprop, self.mvprop_target, self.memory,
            config.discount_factor, config.mvprop_batch_size, config.mvprop_lr,
            config.mvprop_k)

        self.env_mode = env_mode
        self.op_mode = op_mode
        self.state_dim = state_dim
        self.dqn_steps = 0
        self.eps = 1.0
        self.time_scale = 2
        self.episode_steps = 0
    def __init__(self,
                 env_mode,
                 op_mode,
                 state_dim,
                 action_dim,
                 hidden_sizes,
                 max_kl,
                 damping,
                 batch_size,
                 inner_episodes,
                 max_iter,
                 use_fim=False,
                 use_gpu=False,
                 terrain_var=False
                 ):

        self.grid = GridWorldContinuous(grid_mode='standard', terrain=terrain_var)
        self.policy = ContinuousMLP(state_dim,
                                    action_dim,
                                    hidden_sizes=hidden_sizes,
                                    activation=F.relu,
                                    is_disc_action=False)
        self.optimizer = TRPO(policy=self.policy,
                              use_gpu=use_gpu,
                              max_kl=max_kl,
                              damping=damping,
                              use_fim=use_fim,
                              discount=0.99,
                              imp_weight=False)

        self.env_mode = env_mode
        self.op_mode = op_mode

        self.batch_size = batch_size
        self.inner_episodes = inner_episodes
        self.max_iter = max_iter
        self.state_dim = state_dim