Example #1
0
File: a2c.py Project: yyht/RLs
    def __init__(
            self,
            s_dim,
            visual_sources,
            visual_resolution,
            a_dim,
            is_continuous,
            epoch=5,
            beta=1.0e-3,
            actor_lr=5.0e-4,
            critic_lr=1.0e-3,
            hidden_units={
                'actor_continuous': [32, 32],
                'actor_discrete': [32, 32],
                'critic': [32, 32]
            },
            **kwargs):
        super().__init__(s_dim=s_dim,
                         visual_sources=visual_sources,
                         visual_resolution=visual_resolution,
                         a_dim=a_dim,
                         is_continuous=is_continuous,
                         **kwargs)
        self.beta = beta
        self.epoch = epoch

        # self.TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_dim], [1])
        if self.is_continuous:
            self.actor_net = rls.actor_mu(self.feat_dim, self.a_dim,
                                          hidden_units['actor_continuous'])
            self.log_std = tf.Variable(initial_value=-0.5 *
                                       np.ones(self.a_dim, dtype=np.float32),
                                       trainable=True)
            self.actor_tv = self.actor_net.trainable_variables + [self.log_std]
        else:
            self.actor_net = rls.actor_discrete(self.feat_dim, self.a_dim,
                                                hidden_units['actor_discrete'])
            self.actor_tv = self.actor_net.trainable_variables
        self.critic_net = rls.critic_v(self.feat_dim, hidden_units['critic'])
        self.critic_tv = self.critic_net.trainable_variables + self.other_tv
        self.actor_lr, self.critic_lr = map(self.init_lr,
                                            [actor_lr, critic_lr])
        self.optimizer_actor, self.optimizer_critic = map(
            self.init_optimizer, [self.actor_lr, self.critic_lr])
        self.model_recorder(
            dict(actor=self.actor_net,
                 critic=self.critic_net,
                 optimizer_actor=self.optimizer_actor,
                 optimizer_critic=self.optimizer_critic))

        self.initialize_data_buffer()
Example #2
0
    def __init__(self,
                 s_dim,
                 visual_sources,
                 visual_resolution,
                 a_dim,
                 is_continuous,
                 lr=5.0e-4,
                 epoch=5,
                 hidden_units={
                     'actor_continuous': [32, 32],
                     'actor_discrete': [32, 32]
                 },
                 **kwargs):
        super().__init__(s_dim=s_dim,
                         visual_sources=visual_sources,
                         visual_resolution=visual_resolution,
                         a_dim=a_dim,
                         is_continuous=is_continuous,
                         **kwargs)
        self.epoch = epoch
        # self.TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_dim], [1])
        if self.is_continuous:
            self.net = rls.actor_mu(self.feat_dim, self.a_dim,
                                    hidden_units['actor_continuous'])
            self.log_std = tf.Variable(initial_value=-0.5 *
                                       np.ones(self.a_dim, dtype=np.float32),
                                       trainable=True)
            self.net_tv = self.net.trainable_variables + [self.log_std
                                                          ] + self.other_tv
        else:
            self.net = rls.actor_discrete(self.feat_dim, self.a_dim,
                                          hidden_units['actor_discrete'])
            self.net_tv = self.net.trainable_variables + self.other_tv
        self.lr = self.init_lr(lr)
        self.optimizer = self.init_optimizer(self.lr)

        self.model_recorder(dict(model=self.net, optimizer=self.optimizer))

        self.initialize_data_buffer()
Example #3
0
    def __init__(
            self,
            s_dim,
            visual_sources,
            visual_resolution,
            a_dim,
            is_continuous,
            beta=1.0e-3,
            lr=5.0e-4,
            delta=0.01,
            lambda_=0.95,
            cg_iters=10,
            train_v_iters=10,
            damping_coeff=0.1,
            backtrack_iters=10,
            backtrack_coeff=0.8,
            epsilon=0.2,
            critic_lr=1e-3,
            hidden_units={
                'actor_continuous': [32, 32],
                'actor_discrete': [32, 32],
                'critic': [32, 32]
            },
            **kwargs):
        super().__init__(s_dim=s_dim,
                         visual_sources=visual_sources,
                         visual_resolution=visual_resolution,
                         a_dim=a_dim,
                         is_continuous=is_continuous,
                         **kwargs)
        self.beta = beta
        self.delta = delta
        self.lambda_ = lambda_
        self.epsilon = epsilon
        self.cg_iters = cg_iters
        self.damping_coeff = damping_coeff
        self.backtrack_iters = backtrack_iters
        self.backtrack_coeff = backtrack_coeff
        self.train_v_iters = train_v_iters

        # self.actor_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_dim], [1], [1])
        # self.critic_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [1])

        if self.is_continuous:
            self.actor_net = rls.actor_mu(self.feat_dim, self.a_dim,
                                          hidden_units['actor_continuous'])
            self.log_std = tf.Variable(initial_value=-0.5 *
                                       np.ones(self.a_dim, dtype=np.float32),
                                       trainable=True)
            self.actor_tv = self.actor_net.trainable_variables + [self.log_std]
            # self.Hx_TensorSpecs = [tf.TensorSpec(shape=flat_concat(self.actor_tv).shape, dtype=tf.float32)] \
            #     + get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_dim], [self.a_dim])
        else:
            self.actor_net = rls.actor_discrete(self.feat_dim, self.a_dim,
                                                hidden_units['actor_discrete'])
            self.actor_tv = self.actor_net.trainable_variables
            # self.Hx_TensorSpecs = [tf.TensorSpec(shape=flat_concat(self.actor_tv).shape, dtype=tf.float32)] \
            #     + get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_dim])
        self.critic_net = rls.critic_v(self.feat_dim, hidden_units['critic'])
        self.critic_tv = self.critic_net.trainable_variables + self.other_tv
        self.critic_lr = self.init_lr(critic_lr)
        self.optimizer_critic = self.init_optimizer(self.critic_lr)

        self.model_recorder(
            dict(actor=self.actor_net,
                 critic=self.critic_net,
                 optimizer_critic=self.optimizer_critic))

        if self.is_continuous:
            data_name_list = [
                's', 'visual_s', 'a', 'r', 's_', 'visual_s_', 'done', 'value',
                'log_prob', 'old_mu', 'old_log_std'
            ]
        else:
            data_name_list = [
                's', 'visual_s', 'a', 'r', 's_', 'visual_s_', 'done', 'value',
                'log_prob', 'old_logp_all'
            ]
        self.initialize_data_buffer(data_name_list=data_name_list)
Example #4
0
    def __init__(self,
                 s_dim,
                 visual_sources,
                 visual_resolution,
                 a_dim,
                 is_continuous,

                 policy_epoch=4,
                 value_epoch=4,
                 beta=1.0e-3,
                 lr=5.0e-4,
                 lambda_=0.95,
                 epsilon=0.2,
                 value_epsilon=0.2,
                 share_net=True,
                 actor_lr=3e-4,
                 critic_lr=1e-3,
                 kl_reverse=False,
                 kl_target=0.02,
                 kl_target_cutoff=2,
                 kl_target_earlystop=4,
                 kl_beta=[0.7, 1.3],
                 kl_alpha=1.5,
                 kl_coef=1.0,
                 hidden_units={
                     'share': {
                         'continuous': {
                             'share': [32, 32],
                             'mu': [32, 32],
                             'v': [32, 32]
                         },
                         'discrete': {
                             'share': [32, 32],
                             'logits': [32, 32],
                             'v': [32, 32]
                         }
                     },
                     'actor_continuous': [32, 32],
                     'actor_discrete': [32, 32],
                     'critic': [32, 32]
                 },
                 **kwargs):
        super().__init__(
            s_dim=s_dim,
            visual_sources=visual_sources,
            visual_resolution=visual_resolution,
            a_dim=a_dim,
            is_continuous=is_continuous,
            **kwargs)
        self.beta = beta
        self.policy_epoch = policy_epoch
        self.value_epoch = value_epoch
        self.lambda_ = lambda_
        self.epsilon = epsilon
        self.value_epsilon = value_epsilon
        self.share_net = share_net
        self.kl_reverse = kl_reverse
        self.kl_target = kl_target
        self.kl_alpha = kl_alpha
        self.kl_coef = tf.constant(kl_coef, dtype=tf.float32)

        self.kl_cutoff = kl_target * kl_target_cutoff
        self.kl_stop = kl_target * kl_target_earlystop
        self.kl_low = kl_target * kl_beta[0]
        self.kl_high = kl_target * kl_beta[-1]

        if self.is_continuous:
            self.log_std = tf.Variable(initial_value=-0.5 * np.ones(self.a_dim, dtype=np.float32), trainable=True)
        if self.share_net:
            # self.TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_dim], [1], [1], [1])
            if self.is_continuous:
                self.net = rls.a_c_v_continuous(self.feat_dim, self.a_dim, hidden_units['share']['continuous'])
                self.net_tv = self.net.trainable_variables + [self.log_std] + self.other_tv
            else:
                self.net = rls.a_c_v_discrete(self.feat_dim, self.a_dim, hidden_units['share']['discrete'])
                self.net_tv = self.net.trainable_variables + self.other_tv
            self.lr = self.init_lr(lr)
            self.optimizer = self.init_optimizer(self.lr)
            self.model_recorder(dict(
                model=self.net,
                optimizer=self.optimizer
                ))
        else:
            # self.actor_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_dim], [1], [1])
            # self.critic_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [1])
            if self.is_continuous:
                self.actor_net = rls.actor_mu(self.feat_dim, self.a_dim, hidden_units['actor_continuous'])
                self.actor_net_tv = self.actor_net.trainable_variables+ [self.log_std]
            else:
                self.actor_net = rls.actor_discrete(self.feat_dim, self.a_dim, hidden_units['actor_discrete'])
                self.actor_net_tv = self.actor_net.trainable_variables
            self.critic_net = rls.critic_v(self.feat_dim, hidden_units['critic'])
            self.critic_tv = self.critic_net.trainable_variables + self.other_tv
            self.actor_lr, self.critic_lr = map(self.init_lr, [actor_lr, critic_lr])
            self.optimizer_actor, self.optimizer_critic = map(self.init_optimizer, [self.actor_lr, self.critic_lr])
            self.model_recorder(dict(
                actor=self.actor_net,
                critic=self.critic_net,
                optimizer_actor=self.optimizer_actor,
                optimizer_critic=self.optimizer_critic
                ))
            
        self.initialize_data_buffer(
            data_name_list=['s', 'visual_s', 'a', 'r', 's_', 'visual_s_', 'done', 'value', 'log_prob'])