Ejemplo n.º 1
0
def sac_cnn_lstm(scaled_images, **kwargs):
    activ = tf.nn.relu
    conv1 = activ(
        conv(scaled_images,
             'c1',
             n_filters=32,
             filter_size=5,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    conv2 = activ(
        conv(conv1,
             'c2',
             n_filters=64,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    conv3 = activ(
        conv(conv2,
             'c3',
             n_filters=64,
             filter_size=3,
             stride=2,
             init_scale=np.sqrt(2),
             **kwargs))
    conv3 = conv_to_fc(conv3)
    # try w/o LSTM first
    return activ(linear(conv3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
Ejemplo n.º 2
0
def modified_cnn(scaled_images, **kwargs):
    activ = tf.nn.relu
    layer_1 = activ(conv(scaled_images, 'c1', n_filters=64, filter_size=2, stride=1,  **kwargs))
    layer_2 = activ(conv(layer_1, 'c2', n_filters=128, filter_size=2, stride=1,  **kwargs))
    layer_3 = activ(conv(layer_2, 'c3', n_filters=256, filter_size=2, stride=1,  **kwargs))
    layer_3 = conv_to_fc(layer_3)
    return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
Ejemplo n.º 3
0
def Cnn1(image, **kwargs):
    activ = tf.nn.relu
    layer_1 = activ(
        conv(image,
             'c1',
             n_filters=32,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_2 = activ(
        conv(layer_1,
             'c2',
             n_filters=64,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_3 = activ(
        conv(layer_2,
             'c3',
             n_filters=64,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_3 = conv_to_fc(layer_3)
    return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
Ejemplo n.º 4
0
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs):
        super(NatureCNN, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True)

        with tf.variable_scope("model", reuse=reuse):
            activ = tf.nn.relu
            input = self.processed_obs

            layer_1 = activ(conv(input, 'c1', n_filters=32, filter_size=8, stride=4, init_scale=np.sqrt(2), **kwargs))
            layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=4, stride=2, init_scale=np.sqrt(2), **kwargs))
            layer_3 = activ(conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs))
            layer_3 = conv_to_fc(layer_3)
            extracted_features = activ(linear(layer_3, 'fc1', n_hidden=256, init_scale=np.sqrt(2)))

            value_fn = tf.layers.dense(extracted_features, 1, name='vf')

            self.proba_distribution, self.policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(extracted_features, extracted_features, init_scale=0.01)

        self.value_fn = value_fn
        self.initial_state = None
        self._setup_init()

        total = 0
        for v in tf.trainable_variables():
            dims = v.get_shape().as_list()
            num  = int(np.prod(dims))
            total += num
            print('  %s \t\t Num: %d \t\t Shape %s ' % (v.name, num, dims))
        print('\nTotal number of params: %d' % total)
Ejemplo n.º 5
0
def ppo_cnn(scaled_images, **kwargs):
    activ = tf.nn.elu
    conv1 = activ(
        conv(scaled_images,
             'c1',
             n_filters=32,
             filter_size=5,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    conv2 = activ(
        conv(conv1,
             'c2',
             n_filters=64,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    conv3 = activ(
        conv(conv2,
             'c3',
             n_filters=64,
             filter_size=3,
             stride=2,
             init_scale=np.sqrt(2),
             **kwargs))
    conv3 = conv_to_fc(conv3)
    return activ(linear(conv3, 'fc1', n_hidden=512, init_scale=0.01))
Ejemplo n.º 6
0
    def proba_distribution_from_latent_infer(self,
                                             infer_latent_vector,
                                             init_scale=1.0,
                                             init_bias=0.0,
                                             std_normal=False,
                                             prior_std=0):
        if std_normal:
            pdparam = tf.concat([
                tf.zeros([1, self.size]), prior_std * tf.ones([1, self.size])
            ],
                                axis=1)
            mean = tf.zeros([1, self.size])
            return self.proba_distribution_from_flat(pdparam), mean
        else:
            mean = linear(infer_latent_vector,
                          'infer',
                          self.size,
                          init_scale=init_scale,
                          init_bias=init_bias)
            logstd = tf.get_variable(name='infer/logstd',
                                     shape=[1, self.size],
                                     initializer=tf.zeros_initializer())
            pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)

            return self.proba_distribution_from_flat(pdparam), mean
Ejemplo n.º 7
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 net_arch=None,
                 act_fun=tf.tanh,
                 feature_extraction="cnn",
                 **kwargs):
        super(RelationalPolicy,
              self).__init__(sess,
                             ob_space,
                             ac_space,
                             n_env,
                             n_steps,
                             n_batch,
                             reuse=reuse,
                             scale=(feature_extraction == "cnn"))
        self._kwargs_check(feature_extraction, kwargs)
        with tf.variable_scope("model", reuse=reuse):
            print('self.processed_obs', self.processed_obs)
            relation_block_output = self.relation_block(self.processed_obs)
            pi_latent = vf_latent = tf.layers.flatten(relation_block_output)
            # original code
            self._value_fn = linear(vf_latent, 'vf', 1)
            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01)

        self._setup_init()
Ejemplo n.º 8
0
def tic_tac_toe_cnn(scaled_images, **kwargs):
    """
    Custom CNN for Tic Tac Toe env.

    :param scaled_images: (TensorFlow Tensor) Image input placeholder
    :return: (TensorFlow Tensor) The CNN output layer
    """
    activ = tf.nn.relu
    layer = scaled_images

    # print(kwargs)
    net_arch = kwargs['cnn_arch']
    filter_size = kwargs['filter_size']
    pad = kwargs['pad']

    for i, f in enumerate(net_arch[:-1], start=1):
        # print('c' + str(i), f)
        layer = activ(conv(layer, 'c' + str(i), n_filters=f, filter_size=filter_size,
                           stride=1, pad=pad, data_format='NCHW'))

    layer = conv_to_fc(layer)

    # print('fc1', net_arch[-1])
    # print()
    return activ(linear(layer, 'fc1', n_hidden=net_arch[-1]))
def cnn_3d(scaled_voxels, n_hidden, filters, filter_sizes, strides, **kwargs):
    """
    CNN in 3D.
    :param scaled_voxels: (TensorFlow Tensor) Voxel input placeholder
    :param n_hidden: (int) Number of nodes in the last linear layer
    :param filters: (array) Filter numbers for the convolutional layers of the CNN
    :param filter_sizes: (array) Filter sizes for the convolutional layers of the CNN
    :param strides: (array) Strides for the convolutional layers of the CNN
    :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN
    :return: (TensorFlow Tensor) The CNN output layer
    """
    activ = tf.tanh

    layers = []

    for i, (n_filter, filter_size,
            stride) in enumerate(zip(filters, filter_sizes, strides)):

        input_layer = scaled_voxels if i == 0 else layers[-1]
        label = 'c%d' % (i + 1)
        layer = activ(
            conv3d(input_layer,
                   label,
                   n_filters=n_filter,
                   filter_size=filter_size,
                   stride=stride,
                   init_scale=np.sqrt(2),
                   **kwargs))
        layers.append(layer)
        print('layer_%d' % (i + 1), layer.shape)

    layer = conv_to_fc(layers[-1])

    return tf.tanh(
        linear(layer, 'fc1', n_hidden=n_hidden, init_scale=np.sqrt(2)))
Ejemplo n.º 10
0
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, layers=None, net_arch=None,
                 act_fun=tf.tanh, cnn_extractor=nature_cnn, feature_extraction="cnn", **kwargs):
        super(FeedForwardPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse,
                                                scale=(feature_extraction == "cnn"))

        self._kwargs_check(feature_extraction, kwargs)

        if layers is not None:
            warnings.warn("Usage of the `layers` parameter is deprecated! Use net_arch instead "
                          "(it has a different semantics though).", DeprecationWarning)
            if net_arch is not None:
                warnings.warn("The new `net_arch` parameter overrides the deprecated `layers` parameter!",
                              DeprecationWarning)

        if net_arch is None:
            if layers is None:
                layers = [64, 64]
            net_arch = [dict(vf=layers, pi=layers)]

        with tf.variable_scope("model", reuse=reuse):
            if feature_extraction == "cnn":
                pi_latent = vf_latent = cnn_extractor(self.processed_obs, **kwargs)
            else:
                pi_latent, vf_latent = mlp_extractor(tf.layers.flatten(self.processed_obs), net_arch, act_fun)

            self.value_fn = linear(vf_latent, 'vf', 1)

            self.proba_distribution, self.policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01)

        self.initial_state = None
        self._setup_init()
Ejemplo n.º 11
0
def embedding(entities, n_heads, embedding_sizes, scope):
    """
    :param entities: (TensorFlow Tensor) The input entities : [B,N,D]
    :param scope: (str) The TensorFlow variable scope
    :param n_heads: (float) The number of attention heads to use
    :return: (TensorFlow Tensor) [B,n_heads,N,embedding_sizes[i]]
    """
    with tf.variable_scope(scope):
        N = entities.shape[1].value
        channels = entities.shape[2].value
        # total_size Denoted as F, n_heads Denoted as H
        total_size = sum(embedding_sizes) * n_heads
        # [B*N,D]
        entities = tf.reshape(entities, [-1, channels])
        # [B*N,F] F = sum(embedding_sizes) * n_heads
        embedded_entities = linear(entities, "mlp", total_size)
        # [B*N,F] --> [B,N,F] new
        embedded_entities = tf.reshape(embedded_entities, [-1, N, total_size])
        # [B*N,F]
        qkv = layerNorm(embedded_entities, "ln")
        # qkv = batchNorm(embedded_entities, "bn")
        # qkv = instanceNorm(embedded_entities, "instacne_n")
        # qkv = FRNorm(embedded_entities, 'FRNorm')
        # # [B,N,F]
        # qkv = tf.reshape(qkv, [-1, N, total_size])
        # [B,N,n_heads,sum(embedding_sizes)]
        qkv = tf.reshape(qkv, [-1, N, n_heads, sum(embedding_sizes)])
        # [B,N,n_heads,sum(embedding_sizes)] -> [B,n_heads,N,sum(embedding_sizes)]
        qkv = tf.transpose(qkv, [0, 2, 1, 3])
        return tf.split(qkv, embedding_sizes, -1)
Ejemplo n.º 12
0
def minigrid_extractor_small(scaled_images, **kwargs):
    """
    CNN for MiniGrid environments with variable grid sizes
    """
    activ = tf.nn.relu
    # first layer is just an embedding finder
    layer_1 = conv(scaled_images,
                   'c1',
                   n_filters=32,
                   filter_size=1,
                   stride=1,
                   init_scale=np.sqrt(2),
                   **kwargs)
    layer_2 = activ(
        conv(layer_1,
             'c2',
             n_filters=64,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_3 = activ(
        conv(layer_2,
             'c3',
             n_filters=64,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_4 = conv_to_fc(layer_3)
    print(layer_3)
    return activ(linear(layer_4, 'fc1', n_hidden=128, init_scale=np.sqrt(2)))
Ejemplo n.º 13
0
 def proba_distribution_from_latent(self,
                                    pi_latent_vector,
                                    vf_latent_vector,
                                    init_scale=1.0,
                                    init_bias=0.0):
     pdparam = linear(pi_latent_vector,
                      'pi',
                      self.size,
                      init_scale=init_scale,
                      init_bias=init_bias)
     q_values = linear(vf_latent_vector,
                       'q',
                       self.size,
                       init_scale=init_scale,
                       init_bias=init_bias)
     return self.proba_distribution_from_flat(pdparam), pdparam, q_values
def nature_cnn(scaled_images, **kwargs):
    """
    CNN from Nature paper.

    :param scaled_images: (TensorFlow Tensor) Image input placeholder
    :param kwargs: (dict) Extra keywords parameters for the convolutional layers of the CNN
    :return: (TensorFlow Tensor) The CNN output layer
    """
    activ = tf.nn.relu
    layer_1 = activ(
        conv(scaled_images,
             'c1',
             n_filters=32,
             filter_size=8,
             stride=4,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_2 = activ(
        conv(layer_1,
             'c2',
             n_filters=64,
             filter_size=4,
             stride=2,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_3 = activ(
        conv(layer_2,
             'c3',
             n_filters=64,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    layer_3 = conv_to_fc(layer_3)
    return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
Ejemplo n.º 15
0
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, n_lstm=256, reuse=False, layers=None,
                 net_arch=None, layer_norm=False, feature_extraction="cnn",
                 **kwargs):
        # state_shape = [n_lstm * 2] dim because of the cell and hidden states of the LSTM
        super(RelationalLstmPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch,
                                                   state_shape=(2 * n_lstm, ), reuse=reuse,
                                                   scale=(feature_extraction == "cnn"))

        self._kwargs_check(feature_extraction, kwargs)

        with tf.variable_scope("model", reuse=reuse):
            print('self.processed_obs', self.processed_obs)
            relation_block_output = self.relation_block(self.processed_obs)

            # original code
            input_sequence = batch_to_seq(relation_block_output, self.n_env, n_steps)
            print('input_sequence', input_sequence)
            masks = batch_to_seq(self.dones_ph, self.n_env, n_steps)
            rnn_output, self.snew = lstm(input_sequence, masks, self.states_ph, 'lstm1', n_hidden=n_lstm,
                                         layer_norm=layer_norm)
            rnn_output = seq_to_batch(rnn_output)
            value_fn = linear(rnn_output, 'vf', 1)
            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(rnn_output, rnn_output)

        self._value_fn = value_fn

        self._setup_init()
Ejemplo n.º 16
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 **kwargs):
        super(NatureCNN, self).__init__(sess,
                                        ob_space,
                                        ac_space,
                                        n_env,
                                        n_steps,
                                        n_batch,
                                        reuse=reuse,
                                        scale=True)

        with tf.variable_scope("model", reuse=reuse):
            activ = tf.nn.relu

            input = self.processed_obs

            layer_1 = activ(
                conv(input,
                     'c1',
                     n_filters=32,
                     filter_size=8,
                     stride=4,
                     init_scale=np.sqrt(2),
                     **kwargs))
            layer_2 = activ(
                conv(layer_1,
                     'c2',
                     n_filters=64,
                     filter_size=4,
                     stride=2,
                     init_scale=np.sqrt(2),
                     **kwargs))
            layer_3 = activ(
                conv(layer_2,
                     'c3',
                     n_filters=64,
                     filter_size=3,
                     stride=1,
                     init_scale=np.sqrt(2),
                     **kwargs))
            layer_3 = conv_to_fc(layer_3)
            extracted_features = activ(
                linear(layer_3, 'fc1', n_hidden=256, init_scale=np.sqrt(2)))

            value_fn = tf.layers.dense(extracted_features, 1, name='vf')

            self.proba_distribution, self.policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(extracted_features, extracted_features, init_scale=0.01)

        self.value_fn = value_fn
        self.initial_state = None
        self._setup_init()
Ejemplo n.º 17
0
def modified_cnn(unscaled_images, **kwargs):
	import tensorflow as tf
	scaled_images = tf.cast(unscaled_images, tf.float32) / 255.
	activ = tf.nn.relu
	layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=1, stride=1, init_scale=np.sqrt(2), **kwargs))
	layer_2 = activ(conv(layer_1, 'c2', n_filters=32, filter_size=2, stride=2, init_scale=np.sqrt(2), **kwargs))
	layer_2 = conv_to_fc(layer_2)
	return activ(linear(layer_2, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
Ejemplo n.º 18
0
def modified_cnn(scaled_images, **kwargs):
	import tensorflow as tf
	activ = tf.nn.relu
	layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs))
	layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs))
	layer_3 = activ(conv(layer_2, 'c3', n_filters=64, filter_size=3, stride=1, init_scale=np.sqrt(2), **kwargs))
	layer_3 = conv_to_fc(layer_3)
	return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
Ejemplo n.º 19
0
def modified_cnn(scaled_images, **kwargs):
    activ = tf.nn.relu
    # layer 1
    conv1 = conv(scaled_images, "c1", n_filters=128, filter_size=(1, 2), stride=1, init_scale=np.sqrt(2), **kwargs)
    conv2 = conv(scaled_images, "c2", n_filters=128, filter_size=(2, 1), stride=1, init_scale=np.sqrt(2), **kwargs)
    relu1 = activ(conv1)
    relu2 = activ(conv2)
    # layer 2
    conv11 = conv(relu1, "c3", n_filters=128, filter_size=(1, 2), stride=1, init_scale=np.sqrt(2), **kwargs)
    conv12 = conv(relu1, "c4", n_filters=128, filter_size=(2, 1), stride=1, init_scale=np.sqrt(2), **kwargs)
    conv21 = conv(relu2, "c3", n_filters=128, filter_size=(1, 2), stride=1, init_scale=np.sqrt(2), **kwargs)
    conv22 = conv(relu2, "c4", n_filters=128, filter_size=(2, 1), stride=1, init_scale=np.sqrt(2), **kwargs)
    # layer2 relu activation
    relu11 = tf.nn.relu(conv11)
    relu12 = tf.nn.relu(conv12)
    relu21 = tf.nn.relu(conv21)
    relu22 = tf.nn.relu(conv22)

    # get shapes of all activations
    shape1 = relu1.get_shape().as_list()
    shape2 = relu2.get_shape().as_list()

    shape11 = relu11.get_shape().as_list()
    shape12 = relu12.get_shape().as_list()
    shape21 = relu21.get_shape().as_list()
    shape22 = relu22.get_shape().as_list()

    # expansion
    hidden1 = tf.reshape(relu1, [-1, shape1[1] * shape1[2] * shape1[3]])
    hidden2 = tf.reshape(relu2, [-1, shape2[1] * shape2[2] * shape2[3]])

    hidden11 = tf.reshape(relu11, [-1, shape11[1] * shape11[2] * shape11[3]])
    hidden12 = tf.reshape(relu12, [-1, shape12[1] * shape12[2] * shape12[3]])
    hidden21 = tf.reshape(relu21, [-1, shape21[1] * shape21[2] * shape21[3]])
    hidden22 = tf.reshape(relu22, [-1, shape22[1] * shape22[2] * shape22[3]])

    # concatenation
    hidden = tf.concat([hidden1, hidden2, hidden11, hidden12, hidden21, hidden22], axis=1)

    # linear layer 1
    linear_1 = activ(linear(hidden, scope="fc1", n_hidden=512, init_scale=np.sqrt(2)))

    # linear layer 2
    linear_2 = activ(linear(linear_1, scope="fc2", n_hidden=128, init_scale=np.sqrt(2)))

    return linear_2
Ejemplo n.º 20
0
    def proba_distribution_from_latent(self,
                                       pi_latent_vector,
                                       vf_latent_vector,
                                       init_scale=1.0,
                                       init_bias=0.0,
                                       mult_tensors=None,
                                       policy=None):
        if mult_tensors is not None:
            mean = linear_with_mult(mult_tensors,
                                    pi_latent_vector,
                                    'pi',
                                    self.size,
                                    init_scale=init_scale,
                                    init_bias=init_bias)
            logstd = get_mult_variable(mult_tensors,
                                       name="pi/logstd",
                                       shape=[1, self.size],
                                       initializer=tf.zeros_initializer())

            pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
            q_values = linear_with_mult(mult_tensors,
                                        vf_latent_vector,
                                        'q',
                                        self.size,
                                        init_scale=init_scale,
                                        init_bias=init_bias)
            return self.proba_distribution_from_flat(pdparam), mean, q_values
        else:
            mean = linear(pi_latent_vector,
                          'pi',
                          self.size,
                          init_scale=init_scale,
                          init_bias=init_bias)
            policy.policy_neurons.append(mean)
            logstd = tf.get_variable(name='pi/logstd',
                                     shape=[1, self.size],
                                     initializer=tf.zeros_initializer())
            policy.policy_neurons.append(logstd)

            pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
            q_values = linear(vf_latent_vector,
                              'q',
                              self.size,
                              init_scale=init_scale,
                              init_bias=init_bias)
            return self.proba_distribution_from_flat(pdparam), mean, q_values
Ejemplo n.º 21
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 **kwargs):
        super(CustomWPPolicy, self).__init__(sess,
                                             ob_space,
                                             ac_space,
                                             n_env,
                                             n_steps,
                                             n_batch,
                                             reuse=reuse,
                                             scale=False)

        with tf.variable_scope("model", reuse=reuse):
            activ = tf.nn.tanh

            measurement_features = tf.expand_dims(self.processed_obs[:, -1],
                                                  axis=1)
            measurement_features_flat = tf.layers.flatten(measurement_features)

            pi_h = activ(
                linear(measurement_features_flat,
                       "pi_vae_fc",
                       64,
                       init_scale=np.sqrt(2)))
            pi_latent = activ(linear(pi_h, "pi_fc", 64, init_scale=np.sqrt(2)))

            vf_h = activ(
                linear(measurement_features_flat,
                       "vf_vae_fc",
                       64,
                       init_scale=np.sqrt(2)))
            vf_latent = activ(linear(pi_h, "vf_fc", 64, init_scale=np.sqrt(2)))

            value_fn = linear(vf_latent, 'vf', 1, init_scale=np.sqrt(2))

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01)

        self._value_fn = value_fn
        self._setup_init()
Ejemplo n.º 22
0
def vf_builder(vf_arch: str,
               latent: tf.Tensor,
               act_fun: tf.function,
               shared_graph: GraphsTuple = None,
               input_graph: GraphsTuple = None,
               layer_size: int = 64,
               layer_count: int = 3,
               iterations: int = 10) -> tf.Tensor:
    """
    Builds the value function network for
    Args:
        vf_arch: arch to use as a string
        latent: the observation input
        act_fun: activation function
        shared_graph: the gnn output from the policy
        input_graph: GraphTuple before any processing
        iterations: number of iterations of message passing
    Returns:
        A tensor which will hold the value
    """
    if vf_arch == "shared":
        output_globals_vf = tf.reshape(shared_graph.globals, [-1, layer_size])
        latent_vf = output_globals_vf
        latent_vf = act_fun(
            linear(latent_vf, "vf_fc0", 128, init_scale=np.sqrt(2)))
        latent_vf = act_fun(
            linear(latent_vf, "vf_fc1", 128, init_scale=np.sqrt(2)))
    elif vf_arch == "graph":
        model_vf = DDRGraphNetwork(layer_size=layer_size)
        output_graph_vf = model_vf(input_graph, iterations)
        output_globals_vf = tf.reshape(output_graph_vf.globals,
                                       [-1, layer_size])
        latent_vf = output_globals_vf
    elif vf_arch == "mlp":
        latent_vf = latent
        latent_vf = act_fun(
            linear(latent_vf, "vf_fc0", 128, init_scale=np.sqrt(2)))
        latent_vf = act_fun(
            linear(latent_vf, "vf_fc1", 128, init_scale=np.sqrt(2)))
        latent_vf = act_fun(
            linear(latent_vf, "vf_fc2", 128, init_scale=np.sqrt(2)))
    else:
        raise Exception("No such vf network")

    return latent_vf
Ejemplo n.º 23
0
def my_small_cnn(scaled_images, **kwargs):
    activ = tf.nn.relu
    layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=3,
                         stride=1, **kwargs))
    layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=3,
                         stride=1, **kwargs))
    layer_3 = conv_to_fc(layer_2)
    return activ(
        linear(layer_3, 'fc1', n_hidden=32, init_scale=np.sqrt(2)))
Ejemplo n.º 24
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 layers=None,
                 net_arch=None,
                 act_fun=tf.tanh,
                 cnn_extractor=None,
                 feature_extraction="cnn",
                 **kwargs):
        super(CustomFeedForwardPolicy, self).__init__(sess,
                                                      ob_space,
                                                      ac_space,
                                                      n_env,
                                                      n_steps,
                                                      n_batch,
                                                      reuse=reuse,
                                                      scale=False)

        self._kwargs_check(feature_extraction, kwargs)

        with tf.variable_scope("model", reuse=reuse):
            if feature_extraction == "cnn":
                pi_latent = vf_latent = cnn_extractor(self.processed_obs,
                                                      **kwargs)
            else:
                Exception("nope")

            assert str(
                type(self.pdtype)
            ) == "<class 'stable_baselines.common.distributions.DiagGaussianProbabilityDistributionType'>"

            self._value_fn = linear(vf_latent, 'vf', 1)

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(
                    pi_latent, vf_latent, init_scale=0.01)

            # self._value_fn = linear(vf_latent, 'vf', 1)

            # mean = pi_latent
            # n_jets = mean.shape[1]
            # print("njets", n_jets)
            # logstd = tf.get_variable(name='pi/logstd', shape=[1, n_jets], initializer=tf.zeros_initializer())
            # pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)

            # # we take the last layer of our cnn for policy and q value
            # self._proba_distribution = self.pdtype.proba_distribution_from_flat(pdparam)
            # self._policy = pi_latent
            # self.q_value = vf_latent
            # print("heyyyyyyy")

        self._setup_init()
def attention_cnn(scaled_images, **kwargs):
    """Nature CNN with region-sensitive module"""
    def softmax_2d(tensor):
        b, h, w, c = tensor.shape
        tensor = tf.reshape(tensor, (-1, h * w, c))
        tensor = tf.nn.softmax(tensor, axis=1)
        tensor = tf.reshape(tensor, (-1, h, w, c))
        return tensor

    c1 = tf.nn.relu(
        conv(scaled_images,
             'c1',
             n_filters=32,
             filter_size=8,
             stride=4,
             init_scale=np.sqrt(2),
             **kwargs))
    c2 = tf.nn.relu(
        conv(c1,
             'c2',
             n_filters=64,
             filter_size=4,
             stride=2,
             init_scale=np.sqrt(2),
             **kwargs))
    c3 = tf.nn.relu(
        conv(c2,
             'c3',
             n_filters=64,
             filter_size=3,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    c3 = tf.nn.l2_normalize(c3, axis=-1)

    a1 = tf.nn.elu(
        conv(c3,
             'a1',
             n_filters=512,
             filter_size=1,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    a2 = softmax_2d(
        conv(a1,
             'a2',
             n_filters=2,
             filter_size=1,
             stride=1,
             init_scale=np.sqrt(2),
             **kwargs))
    a2 = tf.identity(a2, name='attn')

    x = c3 * tf.reduce_sum(a2, axis=-1, keepdims=True)

    x = conv_to_fc(x)
    return tf.nn.relu(linear(x, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))
Ejemplo n.º 26
0
 def proba_distribution_from_latent(self,
                                    pi_latent_vector,
                                    vf_latent_vector,
                                    action_mask_vector=None,
                                    init_scale=1.0,
                                    init_bias=0.0):
     pdparam = linear(pi_latent_vector,
                      'pi',
                      sum(self.n_vec),
                      init_scale=init_scale,
                      init_bias=init_bias)
     q_values = linear(vf_latent_vector,
                       'q',
                       sum(self.n_vec),
                       init_scale=init_scale,
                       init_bias=init_bias)
     return self.proba_distribution_from_flat(
         pdparam, action_mask_vector=action_mask_vector), pdparam, q_values
Ejemplo n.º 27
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 layers=None,
                 cnn_extractor=nature_cnn,
                 feature_extraction="cnn",
                 obs_phs=None,
                 layer_norm=False,
                 **kwargs):
        super(FeedForwardPolicy,
              self).__init__(sess,
                             ob_space,
                             ac_space,
                             n_env,
                             n_steps,
                             n_batch,
                             n_lstm=256,
                             reuse=reuse,
                             scale=(feature_extraction == "cnn"),
                             obs_phs=obs_phs)
        if layers is None:
            layers = [64, 64]

        with tf.variable_scope("model", reuse=reuse):
            if feature_extraction == "cnn":
                extracted_features = cnn_extractor(self.processed_x, **kwargs)
                pi_latent = extracted_features
            else:
                activ = tf.nn.relu
                processed_x = tf.layers.flatten(self.processed_x)
                pi_h = processed_x
                for i, layer_size in enumerate(layers):
                    pi_h = linear(pi_h,
                                  'pi_fc' + str(i),
                                  n_hidden=layer_size,
                                  init_scale=np.sqrt(2))
                    if layer_norm:
                        pi_h = tf.contrib.layers.layer_norm(pi_h,
                                                            center=True,
                                                            scale=True)
                    pi_h = activ(pi_h)
                pi_latent = pi_h

            self.proba_distribution, self.policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(pi_latent, pi_latent, init_scale=0.01)

        self.value_fn = self.policy
        self.initial_state = None
        self._setup_init()
Ejemplo n.º 28
0
 def cnn_extractor(scaled_images, channels=c, w=w, h=h):
     print(f"========= REAL SHAPE: {scaled_images.shape} ===========")
     original_shape = scaled_images.shape[1]
     print(f"========= SHAPE: {original_shape} ===========")
     scaled_images = tf.reshape(scaled_images, (-1, h, w, channels))
     activ = tf.nn.relu
     layer_1 = activ(conv(scaled_images, 'c1', n_filters=32, filter_size=w, stride=1, init_scale=np.sqrt(2)))
     layer_2 = activ(conv(layer_1, 'c2', n_filters=64, filter_size=1, stride=1, init_scale=np.sqrt(2)))
     layer_3 = activ(conv(layer_2, 'c3', n_filters=128, filter_size=1, stride=1, init_scale=np.sqrt(2)))
     layer_3 = conv_to_fc(layer_3)
     return activ(linear(layer_3, 'fc1', n_hidden=128, init_scale=np.sqrt(2)))
Ejemplo n.º 29
0
 def proba_distribution_from_latent(self,
                                    pi_latent_vector,
                                    vf_latent_vector,
                                    init_scale=1.0,
                                    init_bias=0.0):
     mean = linear(pi_latent_vector,
                   'pi',
                   self.size,
                   init_scale=init_scale,
                   init_bias=init_bias)
     logstd = tf.get_variable(name='pi/logstd',
                              shape=[1, self.size],
                              initializer=tf.zeros_initializer())
     pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
     q_values = linear(vf_latent_vector,
                       'q',
                       self.size,
                       init_scale=init_scale,
                       init_bias=init_bias)
     return self.proba_distribution_from_flat(pdparam), mean, q_values
Ejemplo n.º 30
0
def build_actor_critic_network_actionsadded(x, layers, action_indices,
                                            state_indices, reuse):
    activ = tf.nn.relu
    with tf.variable_scope("actor_critic", reuse=tf.AUTO_REUSE):
        actions = tf.gather(x, action_indices, axis=1)
        actions = tf.reduce_sum(actions, axis=1, keepdims=True)
        state = tf.gather(x, state_indices, axis=1)
        vf_h = tf.layers.flatten(tf.concat([actions, state], axis=1))
        for j, layer_size in enumerate(layers):
            vf_h = activ(
                linear(vf_h,
                       'vf_fc' + str(j),
                       n_hidden=layer_size,
                       init_scale=np.sqrt(2)))
    vf_latent = activ(linear(vf_h, 'vf_head', len(action_indices)))
    value_fn = linear(vf_latent, 'vf', 1)

    pi_latent = build_policy(x, layers, action_indices, state_indices, activ)

    return pi_latent, vf_latent, value_fn