コード例 #1
0
 def proba_distribution_from_latent(self,
                                    pi_latent_vector,
                                    vf_latent_vector,
                                    pi_init_scale=1.0,
                                    pi_init_bias=0.0,
                                    pi_init_std=1.0,
                                    vf_init_scale=1.0,
                                    vf_init_bias=0.0):
     mean = linear(pi_latent_vector,
                   'pi',
                   self.size,
                   init_scale=pi_init_scale,
                   init_bias=pi_init_bias)
     logstd = tf.get_variable(name='pi/logstd',
                              shape=[1, self.size],
                              initializer=tf.constant_initializer(
                                  np.log(pi_init_std)),
                              trainable=False)
     pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
     q_values = linear(vf_latent_vector,
                       'q',
                       self.size,
                       init_scale=vf_init_scale,
                       init_bias=vf_init_bias)
     return self.proba_distribution_from_flat(pdparam), mean, q_values
コード例 #2
0
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, layers=None, net_arch=None,
                 act_fun=tf.tanh, cnn_extractor=nature_cnn, feature_extraction="mlp", **kwargs):
        super(FeedForwardPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse,
                                                scale=(feature_extraction == "cnn"))

        self._pdtype = make_proba_dist_type(ac_space)

        self._kwargs_check(feature_extraction, kwargs)

        if layers is not None:
            warnings.warn("Usage of the `layers` parameter is deprecated! Use net_arch instead "
                          "(it has a different semantics though).", DeprecationWarning)
            if net_arch is not None:
                warnings.warn("The new `net_arch` parameter overrides the deprecated `layers` parameter!",
                              DeprecationWarning)

        if net_arch is None:
            if layers is None:
                layers = [64, 64]
            net_arch = [dict(vf=layers, pi=layers)]

        with tf.variable_scope("model", reuse=reuse):
            if feature_extraction == "cnn":
                pi_latent = vf_latent = cnn_extractor(self.processed_obs, **kwargs)
            else:
                pi_latent, vf_latent = mlp_extractor(tf.layers.flatten(self.processed_obs), net_arch, act_fun)

            self._value_fn = linear(vf_latent, 'vf', 1)

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent,
                                                           pi_init_scale=1.0, pi_init_bias=0.0, pi_init_std=0.125,
                                                           vf_init_scale=1.0, vf_init_bias=0.0)

        self._setup_init()
        return
コード例 #3
0
def sf_cnn(scaled_images, **kwargs) -> tf.Tensor:
    """
  CNN from Nature paper.

  :param scaled_images: (TensorFlow Tensor) Image input placeholder
  :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN
  :return: (TensorFlow Tensor) The CNN output layer
  """
    activ = tf.nn.relu
    layer_1 = activ(
        conv(scaled_images,
             'c1',
             n_filters=32,
             filter_size=8,
             stride=4,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_2 = activ(
        conv(layer_1,
             'c2',
             n_filters=64,
             filter_size=4,
             stride=2,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_3 = activ(
        conv(layer_2,
             'c3',
             n_filters=64,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_3 = conv_to_fc(layer_3)
    return activ(
        linear(layer_3, 'fc1', n_hidden=FEATURE_SIZE, init_scale=np.sqrt(2)))
コード例 #4
0
    def __init__(self,
                 sess: tf.Session,
                 ob_space: ObservSpace,
                 ac_space: ActionSpace,
                 n_env: int,
                 n_steps: int,
                 n_batch: int,
                 reuse=False,
                 layers=None,
                 cnn_extractor=sf_cnn,
                 feature_extraction="cnn",
                 add_action_ph=True,
                 **kwargs):
        super(FeedForwardPolicy,
              self).__init__(sess,
                             ob_space,
                             ac_space,
                             n_env,
                             n_steps,
                             n_batch,
                             n_lstm=256,
                             reuse=reuse,
                             scale=(feature_extraction == "cnn"),
                             add_action_ph=add_action_ph)
        if layers is None:
            layers = [64, 64]

        with tf.variable_scope("model", reuse=reuse):
            value_fn: tf.Tensor = None
            recons_mod: tf.Tensor = None
            successor_feature: tf.Tensor = None
            extracted_features: tf.Tensor = None
            if feature_extraction == "cnn":
                extracted_features = cnn_extractor(self.processed_x, **kwargs)
                # TODO: L2 Normalize extracted features
                assert len(extracted_features.shape) == 2
                extracted_features = tf.nn.l2_normalize(extracted_features,
                                                        axis=1)
                # TODO: Add machado reconstruction module
                recons_mod = reconstruct(extracted_features,
                                         'reconstruct',
                                         action_ph=self.action_ph,
                                         num_action_space=ac_space.n)
                # TODO: Add machado SF estimator
                successor_feature = sf_estimator(extracted_features)
                value_fn = linear(extracted_features, 'vf', 1)
                pi_latent = extracted_features
                vf_latent = extracted_features
            else:
                raise NotImplementedError(
                    'Not implement reconstruction module yet.')
                # activ = tf.tanh
                # processed_x = tf.layers.flatten(self.processed_x)
                # pi_h = processed_x
                # vf_h = processed_x
                # for i, layer_size in enumerate(layers):
                #   pi_h = activ(linear(pi_h, 'pi_fc' + str(i), n_hidden=layer_size, init_scale=np.sqrt(2)))
                #   vf_h = activ(linear(vf_h, 'vf_fc' + str(i), n_hidden=layer_size, init_scale=np.sqrt(2)))
                # value_fn = linear(vf_h, 'vf', 1)
                # pi_latent = pi_h
                # vf_latent = vf_h

            self.proba_distribution, self.policy, self.q_value = \
              self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01)

        self.value_fn = value_fn
        self.recons_mod = recons_mod
        self.successor_feature = successor_feature
        self.reward_bonus = tf.math.reciprocal(
            tf.linalg.norm(self.successor_feature, 2, axis=1))
        self._feature = extracted_features
        self.initial_state = None
        self._setup_init()