コード例 #1
0
    def __init__(self, config, ob_space, ac_space, actor, critic):
        super().__init__(config, ob_space)

        self._ac_space = ac_space  # e.g. ActionSpace(shape=OrderedDict([('default', 8)]),minimum=-1.0, maximum=1.0)

        # build up networks
        self._actor = actor(config, ob_space, ac_space,
                            config.tanh_policy)  # actor model (MLP)
        self._old_actor = actor(config, ob_space, ac_space, config.tanh_policy)
        self._critic = critic(config, ob_space)  # critic model (MLP)
        self._network_cuda(config.device)

        self._actor_optim = optim.Adam(self._actor.parameters(),
                                       lr=config.lr_actor)
        self._critic_optim = optim.Adam(self._critic.parameters(),
                                        lr=config.lr_critic)

        sampler = RandomSampler()
        buffer_keys = [
            'ob', 'ac', 'done', 'rew', 'ret', 'adv', 'ac_before_activation'
        ]
        self._buffer = ReplayBuffer(buffer_keys, config.buffer_size,
                                    sampler.sample_func)

        if config.is_chef:
            logger.info('Creating a PPO agent')
            logger.info('The actor has %d parameters',
                        count_parameters(self._actor))
            logger.info('The critic has %d parameters',
                        count_parameters(self._critic))
コード例 #2
0
ファイル: ddpg_agent.py プロジェクト: junjungoal/rlgarage
    def __init__(self, config, ob_space, ac_space,
                 actor, critic):
        super().__init__(config, ob_space)

        self._ob_space = ob_space
        self._ac_space = ac_space

        self._log_alpha = [torch.zeros(1, requires_grad=True, device=config.device)]
        self._alpha_optim = [optim.Adam([self._log_alpha[0]], lr=config.lr_actor)]

        self._actor = actor(self._config, self._ob_space,
                              self._ac_space, self._config.tanh_policy, deterministic=True)
        self._actor_target = actor(self._config, self._ob_space,
                              self._ac_space, self._config.tanh_policy, deterministic=True)
        self._actor_target.load_state_dict(self._actor.state_dict())
        self._critic = critic(config, ob_space, ac_space)
        self._critic_target = critic(config, ob_space, ac_space)
        self._critic_target.load_state_dict(self._critic.state_dict())

        self._network_cuda(config.device)

        self._actor_optim = optim.Adam(self._actor.parameters(), lr=config.lr_actor)
        self._critic_optim = optim.Adam(self._critic.parameters(), lr=config.lr_critic)

        self._buffer = ReplayBuffer(config,
                                    sampler.sample_func,
                                    ob_space,
                                    ac_space)

        self._ounoise = OUNoise(action_size(ac_space))

        self._log_creation()
コード例 #3
0
    def __init__(self, config, ob_space, ac_space, actor, critic):
        super().__init__(config, ob_space)

        self._ob_space = ob_space
        self._ac_space = ac_space

        self._target_entropy = -ac_space.size
        self._log_alpha = torch.zeros(1, requires_grad=True, device=config.device)
        self._alpha_optim = optim.Adam([self._log_alpha], lr=config.lr_actor)

        # build up networks
        self._build_actor(actor)
        self._critic1 = critic(config, ob_space, ac_space)
        self._critic2 = critic(config, ob_space, ac_space)

        # build up target networks
        self._critic1_target = critic(config, ob_space, ac_space)
        self._critic2_target = critic(config, ob_space, ac_space)
        self._critic1_target.load_state_dict(self._critic1.state_dict())
        self._critic2_target.load_state_dict(self._critic2.state_dict())
        self._network_cuda(config.device)

        self._actor_optim = optim.Adam(self._actor.parameters(), lr=config.lr_actor)
        self._critic1_optim = optim.Adam(self._critic1.parameters(), lr=config.lr_critic)
        self._critic2_optim = optim.Adam(self._critic2.parameters(), lr=config.lr_critic)

        sampler = RandomSampler()
        buffer_keys = ['ob', 'ac', 'done', 'rew']
        self._buffer = ReplayBuffer(buffer_keys,
                                    config.buffer_size,
                                    sampler.sample_func)

        self._log_creation()
コード例 #4
0
    def __init__(self, config, ob_space, ac_space, dqn):
        super().__init__(config, ob_space)

        self._ob_space = ob_space
        self._ac_space = ac_space
        # build up networks
        self._dqn = dqn(config, ob_space, ac_space)
        self._network_cuda(config.device)

        self._dqn_optim = optim.Adam(self._dqn.parameters(),
                                     lr=config.lr_actor)
        sampler = RandomSampler()
        self._buffer = ReplayBuffer(config.buffer_size, sampler.sample_func,
                                    ob_space, ac_space)
コード例 #5
0
ファイル: sac_agent.py プロジェクト: junjungoal/rlgarage
    def __init__(self, config, ob_space, ac_space, actor, critic):
        super().__init__(config, ob_space)

        self._ob_space = ob_space
        self._ac_space = ac_space

        self._log_alpha = torch.tensor(np.log(config.alpha),
                                       requires_grad=True,
                                       device=config.device)
        self._alpha_optim = optim.Adam([self._log_alpha], lr=config.lr_actor)

        # build up networks
        self._actor = actor(config, ob_space, ac_space, config.tanh_policy)
        self._critic1 = critic(config, ob_space, ac_space)
        self._critic2 = critic(config, ob_space, ac_space)

        self._target_entropy = -action_size(self._actor._ac_space)

        # build up target networks
        self._critic1_target = critic(config, ob_space, ac_space)
        self._critic2_target = critic(config, ob_space, ac_space)
        self._critic1_target.load_state_dict(self._critic1.state_dict())
        self._critic2_target.load_state_dict(self._critic2.state_dict())

        if config.policy == 'cnn':
            self._critic2.base.copy_conv_weights_from(self._critic1.base)
            self._actor.base.copy_conv_weights_from(self._critic1.base)

            if config.unsup_algo == 'curl':
                self._curl = CURL(config, ob_space, ac_space, self._critic1,
                                  self._critic1_target)
                self._encoder_optim = optim.Adam(
                    self._critic1.base.parameters(), lr=config.lr_encoder)
                self._cpc_optim = optim.Adam(self._curl.parameters(),
                                             lr=config.lr_encoder)

        self._network_cuda(config.device)

        self._actor_optim = optim.Adam(self._actor.parameters(),
                                       lr=config.lr_actor)
        self._critic1_optim = optim.Adam(self._critic1.parameters(),
                                         lr=config.lr_critic)
        self._critic2_optim = optim.Adam(self._critic2.parameters(),
                                         lr=config.lr_critic)

        self._buffer = ReplayBuffer(config, ob_space, ac_space)
コード例 #6
0
    def __init__(self, config, ob_space, ac_space,
                 actor, critic):
        super().__init__(config, ob_space)

        self._ac_space = ac_space

        # build up networks
        self._actor = actor(config, ob_space, ac_space, config.tanh_policy)
        self._old_actor = actor(config, ob_space, ac_space, config.tanh_policy)
        self._critic = critic(config, ob_space)
        self._network_cuda(config.device)

        self._actor_optim = optim.Adam(self._actor.parameters(), lr=config.lr_actor)
        self._critic_optim = optim.Adam(self._critic.parameters(), lr=config.lr_critic)

        sampler = RandomSampler()
        self._buffer = ReplayBuffer(['ob', 'ac', 'done', 'rew', 'ret', 'adv', 'ac_before_activation'],
                                    config.buffer_size,
                                    sampler.sample_func)

        if config.is_chef:
            logger.info('Creating a PPO agent')
            logger.info('The actor has %d parameters', count_parameters(self._actor))
            logger.info('The critic has %d parameters', count_parameters(self._critic))
コード例 #7
0
    def __init__(
        self,
        config,
        ob_space,
        ac_space,
        actor,
        critic,
        non_limited_idx=None,
        ref_joint_pos_indexes=None,
        joint_space=None,
        is_jnt_limited=None,
        jnt_indices=None,
    ):
        super().__init__(config, ob_space)

        self._ob_space = ob_space
        self._ac_space = ac_space
        self._jnt_indices = jnt_indices
        self._ref_joint_pos_indexes = ref_joint_pos_indexes
        self._joint_space = joint_space
        self._is_jnt_limited = is_jnt_limited
        if joint_space is not None:
            self._jnt_minimum = joint_space["default"].low
            self._jnt_maximum = joint_space["default"].high

        self._log_alpha = [
            torch.zeros(1, requires_grad=True, device=config.device)
        ]
        self._alpha_optim = [
            optim.Adam([self._log_alpha[0]], lr=config.lr_actor)
        ]

        self._actor = actor(
            self._config,
            self._ob_space,
            self._ac_space,
            self._config.tanh_policy,
            deterministic=True,
        )
        self._actor_target = actor(
            self._config,
            self._ob_space,
            self._ac_space,
            self._config.tanh_policy,
            deterministic=True,
        )
        self._actor_target.load_state_dict(self._actor.state_dict())
        self._critic1 = critic(config, ob_space, ac_space)
        self._critic2 = critic(config, ob_space, ac_space)
        self._critic1_target = critic(config, ob_space, ac_space)
        self._critic2_target = critic(config, ob_space, ac_space)
        self._critic1_target.load_state_dict(self._critic1.state_dict())
        self._critic2_target.load_state_dict(self._critic2.state_dict())

        self._network_cuda(config.device)

        self._actor_optim = optim.Adam(self._actor.parameters(),
                                       lr=config.lr_actor)
        self._critic1_optim = optim.Adam(self._critic1.parameters(),
                                         lr=config.lr_critic)
        self._critic2_optim = optim.Adam(self._critic2.parameters(),
                                         lr=config.lr_critic)

        self._update_steps = 0

        sampler = RandomSampler()
        buffer_keys = ["ob", "ac", "done", "rew"]
        if config.mopa or config.expand_ac_space:
            buffer_keys.append("intra_steps")
        self._buffer = ReplayBuffer(buffer_keys, config.buffer_size,
                                    sampler.sample_func)

        self._log_creation()

        self._planner = None
        self._is_planner_initialized = False
        if config.mopa:
            self._planner = PlannerAgent(
                config,
                ac_space,
                non_limited_idx,
                planner_type=config.planner_type,
                passive_joint_idx=config.passive_joint_idx,
                ignored_contacts=config.ignored_contact_geom_ids,
                is_simplified=config.is_simplified,
                simplified_duration=config.simplified_duration,
                allow_approximate=config.allow_approximate,
                range_=config.range,
            )
            self._simple_planner = PlannerAgent(
                config,
                ac_space,
                non_limited_idx,
                planner_type=config.simple_planner_type,
                passive_joint_idx=config.passive_joint_idx,
                ignored_contacts=config.ignored_contact_geom_ids,
                goal_bias=1.0,
                allow_approximate=False,
                is_simplified=config.simple_planner_simplified,
                simplified_duration=config.simple_planner_simplified_duration,
                range_=config.simple_planner_range,
            )
            self._omega = config.omega
コード例 #8
0
    def __init__(
        self,
        config,
        ob_space,
        ac_space,
        actor,
        critic,
        non_limited_idx=None,
        ref_joint_pos_indexes=None,
        joint_space=None,
        is_jnt_limited=None,
        jnt_indices=None,
    ):
        super().__init__(config, ob_space)

        self._ob_space = ob_space
        self._ac_space = ac_space
        self._jnt_indices = jnt_indices
        self._ref_joint_pos_indexes = ref_joint_pos_indexes
        self._log_alpha = torch.tensor(np.log(config.alpha),
                                       requires_grad=True,
                                       device=config.device)
        self._alpha_optim = optim.Adam([self._log_alpha], lr=config.lr_actor)
        self._joint_space = joint_space
        self._is_jnt_limited = is_jnt_limited
        if joint_space is not None:
            self._jnt_minimum = joint_space["default"].low
            self._jnt_maximum = joint_space["default"].high

        # build up networks
        self._build_actor(actor)
        self._build_critic(critic)
        self._network_cuda(config.device)

        self._target_entropy = -action_size(self._actor._ac_space)

        self._actor_optim = optim.Adam(self._actor.parameters(),
                                       lr=config.lr_actor)
        self._critic1_optim = optim.Adam(self._critic1.parameters(),
                                         lr=config.lr_critic)
        self._critic2_optim = optim.Adam(self._critic2.parameters(),
                                         lr=config.lr_critic)

        sampler = RandomSampler()
        buffer_keys = ["ob", "ac", "meta_ac", "done", "rew"]
        if config.mopa or config.expand_ac_space:
            buffer_keys.append("intra_steps")
        self._buffer = ReplayBuffer(buffer_keys, config.buffer_size,
                                    sampler.sample_func)

        self._log_creation()

        self._planner = None
        self._is_planner_initialized = False
        if config.mopa:
            self._planner = PlannerAgent(
                config,
                ac_space,
                non_limited_idx,
                planner_type=config.planner_type,
                passive_joint_idx=config.passive_joint_idx,
                ignored_contacts=config.ignored_contact_geom_ids,
                is_simplified=config.is_simplified,
                simplified_duration=config.simplified_duration,
                range_=config.range,
            )
            self._simple_planner = PlannerAgent(
                config,
                ac_space,
                non_limited_idx,
                planner_type=config.simple_planner_type,
                passive_joint_idx=config.passive_joint_idx,
                ignored_contacts=config.ignored_contact_geom_ids,
                goal_bias=1.0,
                is_simplified=config.simple_planner_simplified,
                simplified_duration=config.simple_planner_simplified_duration,
                range_=config.simple_planner_range,
            )
            self._omega = config.omega