Example #1
0
    def __init__(self,
                 s_dim,
                 visual_sources,
                 visual_resolution,
                 a_dim_or_list,
                 is_continuous,

                 lr=5.0e-4,
                 epoch=5,
                 hidden_units={
                     'actor_continuous': [32, 32],
                     'actor_discrete': [32, 32]
                 },
                 **kwargs):
        super().__init__(
            s_dim=s_dim,
            visual_sources=visual_sources,
            visual_resolution=visual_resolution,
            a_dim_or_list=a_dim_or_list,
            is_continuous=is_continuous,
            **kwargs)
        self.epoch = epoch
        self.TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts], [1])
        self.visual_net = Nn.VisualNet('visual_net', self.visual_dim)
        if self.is_continuous:
            self.net = Nn.actor_mu(self.s_dim, self.visual_dim, self.a_counts, 'pg_net', hidden_units['actor_continuous'], visual_net=self.actor_visual_net)
            self.log_std = tf.Variable(initial_value=-0.5 * np.ones(self.a_counts, dtype=np.float32), trainable=True)
            self.net.tv+=[self.log_std]
        else:
            self.net = Nn.actor_discrete(self.s_dim, self.visual_dim, self.a_counts, 'pg_net', hidden_units['actor_discrete'], visual_net=self.actor_visual_net)
        self.lr = tf.keras.optimizers.schedules.PolynomialDecay(lr, self.max_episode, 1e-10, power=1.0)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.lr(self.episode))
Example #2
0
    def __init__(
            self,
            s_dim,
            visual_sources,
            visual_resolution,
            a_dim_or_list,
            is_continuous,
            epoch=5,
            beta=1.0e-3,
            actor_lr=5.0e-4,
            critic_lr=1.0e-3,
            hidden_units={
                'actor_continuous': [32, 32],
                'actor_discrete': [32, 32],
                'critic': [32, 32]
            },
            **kwargs):
        super().__init__(s_dim=s_dim,
                         visual_sources=visual_sources,
                         visual_resolution=visual_resolution,
                         a_dim_or_list=a_dim_or_list,
                         is_continuous=is_continuous,
                         **kwargs)
        self.beta = beta
        self.epoch = epoch
        self.TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim,
                                           [self.a_counts], [1])
        if self.is_continuous:
            self.actor_net = Nn.actor_mu(self.s_dim, self.visual_dim,
                                         self.a_counts, 'actor_net',
                                         hidden_units['actor_continuous'])
            self.log_std = tf.Variable(
                initial_value=-0.5 * np.ones(self.a_counts, dtype=np.float32),
                trainable=True)
        else:
            self.actor_net = Nn.actor_discrete(self.s_dim, self.visual_dim,
                                               self.a_counts, 'actor_net',
                                               hidden_units['actor_discrete'])
        self.critic_net = Nn.critic_v(self.s_dim, self.visual_dim,
                                      'critic_net', hidden_units['critic'])
        self.actor_lr = tf.keras.optimizers.schedules.PolynomialDecay(
            actor_lr, self.max_episode, 1e-10, power=1.0)
        self.critic_lr = tf.keras.optimizers.schedules.PolynomialDecay(
            critic_lr, self.max_episode, 1e-10, power=1.0)
        self.optimizer_critic = tf.keras.optimizers.Adam(
            learning_rate=self.critic_lr(self.episode))
        self.optimizer_actor = tf.keras.optimizers.Adam(
            learning_rate=self.actor_lr(self.episode))
        self.recorder.logger.info('''
       xx           xxxxx          xxxxxx    
      xxx           xx xxx        xxx  xx    
      xxx           xx xxx        xx    xx   
      x xx             xx         xx         
     xx xx            xxx        xxx         
     xxxxxx           xx         xxx         
    xx   xx          xx           xx    xx   
    xx   xx         xx  x         xxx  xxx   
   xxx  xxxxx      xxxxxx          xxxxxx    
        ''')
Example #3
0
    def __init__(self,
                 s_dim,
                 visual_sources,
                 visual_resolution,
                 a_dim_or_list,
                 is_continuous,

                 beta=1.0e-3,
                 lr=5.0e-4,
                 delta=0.01,
                 lambda_=0.95,
                 cg_iters=10,
                 train_v_iters=10,
                 damping_coeff=0.1,
                 backtrack_iters=10,
                 backtrack_coeff=0.8,
                 share_visual_net=True,
                 epsilon=0.2,
                 critic_lr=1e-3,
                 hidden_units={
                     'actor_continuous': [32, 32],
                     'actor_discrete': [32, 32],
                     'critic': [32, 32]
                 },
                 **kwargs):
        super().__init__(
            s_dim=s_dim,
            visual_sources=visual_sources,
            visual_resolution=visual_resolution,
            a_dim_or_list=a_dim_or_list,
            is_continuous=is_continuous,
            **kwargs)
        self.beta = beta
        self.delta = delta
        self.lambda_ = lambda_
        self.epsilon = epsilon
        self.cg_iters = cg_iters
        self.damping_coeff = damping_coeff
        self.backtrack_iters = backtrack_iters
        self.backtrack_coeff = backtrack_coeff
        self.train_v_iters = train_v_iters

        self.actor_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts], [1], [1])
        self.critic_TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim, [1])

        self.share_visual_net = share_visual_net
        if self.share_visual_net:
            self.actor_visual_net = self.critic_visual_net = Nn.VisualNet('visual_net', self.visual_dim)
        else:
            self.actor_visual_net = Nn.VisualNet('actor_visual_net', self.visual_dim)
            self.critic_visual_net = Nn.VisualNet('critic_visual_net', self.visual_dim)

        if self.is_continuous:
            self.actor_net = Nn.actor_mu(self.s_dim, self.a_counts, 'actor_net', hidden_units['actor_continuous'], visual_net=self.actor_visual_net)
            self.log_std = tf.Variable(initial_value=-0.5 * np.ones(self.a_counts, dtype=np.float32), trainable=True)
            self.actor_net.tv += [self.log_std]
            self.actor_params = self.actor_net.tv
            self.Hx_TensorSpecs = [tf.TensorSpec(shape=flat_concat(self.actor_params).shape, dtype=tf.float32)] \
                + get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts], [self.a_counts])
        else:
            self.actor_net = Nn.actor_discrete(self.s_dim, self.a_counts, 'actor_net', hidden_units['actor_discrete'], visual_net=self.actor_visual_net)
            self.actor_params = self.actor_net.tv
            self.Hx_TensorSpecs = [tf.TensorSpec(shape=flat_concat(self.actor_params).shape, dtype=tf.float32)] \
                + get_TensorSpecs([self.s_dim], self.visual_dim, [self.a_counts])
        self.critic_net = Nn.critic_v(self.s_dim, 'critic_net', hidden_units['critic'], visual_net=self.critic_visual_net)
        self.critic_lr = tf.keras.optimizers.schedules.PolynomialDecay(critic_lr, self.max_episode, 1e-10, power=1.0)
        self.optimizer_critic = tf.keras.optimizers.Adam(learning_rate=self.critic_lr(self.episode))
Example #4
0
    def __init__(
            self,
            s_dim,
            visual_sources,
            visual_resolution,
            a_dim_or_list,
            is_continuous,
            epoch=5,
            beta=1.0e-3,
            lr=5.0e-4,
            lambda_=0.95,
            epsilon=0.2,
            share_net=True,
            actor_lr=3e-4,
            critic_lr=1e-3,
            hidden_units={
                'share': {
                    'continuous': {
                        'share': [32, 32],
                        'mu': [32, 32],
                        'v': [32, 32]
                    },
                    'discrete': {
                        'share': [32, 32],
                        'logits': [32, 32],
                        'v': [32, 32]
                    }
                },
                'actor_continuous': [32, 32],
                'actor_discrete': [32, 32],
                'critic': [32, 32]
            },
            **kwargs):
        super().__init__(s_dim=s_dim,
                         visual_sources=visual_sources,
                         visual_resolution=visual_resolution,
                         a_dim_or_list=a_dim_or_list,
                         is_continuous=is_continuous,
                         **kwargs)
        self.beta = beta
        self.epoch = epoch
        self.lambda_ = lambda_
        self.epsilon = epsilon
        self.share_net = share_net

        if self.is_continuous:
            self.log_std = tf.Variable(
                initial_value=-0.5 * np.ones(self.a_counts, dtype=np.float32),
                trainable=True)
        if self.share_net:
            self.TensorSpecs = get_TensorSpecs([self.s_dim], self.visual_dim,
                                               [self.a_counts], [1], [1], [1])
            self.visual_net = Nn.VisualNet('visual_net', self.visual_dim)
            if self.is_continuous:
                self.net = Nn.a_c_v_continuous(
                    self.s_dim,
                    self.a_counts,
                    'ppo_net',
                    hidden_units['share']['continuous'],
                    visual_net=self.visual_net)
                self.net.tv += [self.log_std]
            else:
                self.net = Nn.a_c_v_discrete(self.s_dim,
                                             self.a_counts,
                                             'ppo_net',
                                             hidden_units['share']['discrete'],
                                             visual_net=self.visual_net)
            self.lr = tf.keras.optimizers.schedules.PolynomialDecay(
                lr, self.max_episode, 1e-10, power=1.0)
            self.optimizer = tf.keras.optimizers.Adam(
                learning_rate=self.lr(self.episode))
        else:
            self.actor_TensorSpecs = get_TensorSpecs([self.s_dim],
                                                     self.visual_dim,
                                                     [self.a_counts], [1], [1])
            self.critic_TensorSpecs = get_TensorSpecs([self.s_dim],
                                                      self.visual_dim, [1])
            self.actor_visual_net = Nn.VisualNet('actor_visual_net',
                                                 self.visual_dim)
            self.critic_visual_net = Nn.VisualNet('critic_visual_net',
                                                  self.visual_dim)
            if self.is_continuous:
                self.actor_net = Nn.actor_mu(self.s_dim,
                                             self.a_counts,
                                             'actor_net',
                                             hidden_units['actor_continuous'],
                                             visual_net=self.actor_visual_net)
                self.actor_net.tv += [self.log_std]
            else:
                self.actor_net = Nn.actor_discrete(
                    self.s_dim,
                    self.a_counts,
                    'actor_net',
                    hidden_units['actor_discrete'],
                    visual_net=self.actor_visual_net)
            self.critic_net = Nn.critic_v(self.s_dim,
                                          'critic_net',
                                          hidden_units['critic'],
                                          visual_net=self.critic_visual_net)
            self.actor_lr = tf.keras.optimizers.schedules.PolynomialDecay(
                actor_lr, self.max_episode, 1e-10, power=1.0)
            self.critic_lr = tf.keras.optimizers.schedules.PolynomialDecay(
                critic_lr, self.max_episode, 1e-10, power=1.0)
            self.optimizer_actor = tf.keras.optimizers.Adam(
                learning_rate=self.actor_lr(self.episode))
            self.optimizer_critic = tf.keras.optimizers.Adam(
                learning_rate=self.critic_lr(self.episode))