def __init__(self, input_dim, action_dim):
    super(StochasticActor, self).__init__()

    self.mu = tf.keras.Sequential([
        tf.layers.Dense(
            units=64,
            activation='tanh',
            kernel_initializer=tf.orthogonal_initializer(),
            input_shape=(input_dim,)),
        tf.layers.Dense(
            units=64,
            activation='tanh',
            kernel_initializer=tf.orthogonal_initializer()),
        tf.layers.Dense(
            units=action_dim,
            activation=None,
            kernel_initializer=tf.orthogonal_initializer(0.01))
    ])

    # We exponentiate the logsig to get sig (hence we don't need softplus).
    self.logsig = tf.get_variable(
        name='logsig',
        shape=[1, action_dim],
        dtype=tf.float32,
        initializer=tf.zeros_initializer(),
        trainable=True)
 def __init__(self, input_dim):
     super(Critic, self).__init__()
     self.main = tf.keras.Sequential([
         tf.layers.Dense(units=64,
                         input_shape=(input_dim, ),
                         activation='tanh',
                         kernel_initializer=tf.orthogonal_initializer()),
         tf.layers.Dense(units=64,
                         activation='tanh',
                         kernel_initializer=tf.orthogonal_initializer()),
         tf.layers.Dense(units=1,
                         activation=None,
                         kernel_initializer=tf.orthogonal_initializer())
     ])
Exemple #3
0
    def dense(self,
              inputs,
              output_size,
              scope="dense",
              use_bias=True,
              activation=None):

        inputs = tf.convert_to_tensor(inputs)
        shape = inputs.get_shape().as_list()
        last_dim = shape[-1]
        rank = len(shape)

        # initial kernel
        kernel = tf.get_variable(shape=(last_dim, output_size),
                                 initializer=tf.orthogonal_initializer(),
                                 name='W')

        bias = tf.get_variable(shape=(output_size, ),
                               initializer=tf.zeros_initializer(),
                               name='b')

        with tf.variable_scope(name_or_scope=scope):
            if rank > 2:
                # Broadcasting is required for the inputs.
                outputs = tf.tensordot(inputs, kernel, [[rank - 1], [0]])
            else:
                # Cast the inputs to self.dtype, which is the variable dtype. We do not
                # cast if `should_cast_variables` is True, as in that case the variable
                # will be automatically casted to inputs.dtype.
                outputs = tf.matmul(inputs, kernel)
            if use_bias:
                outputs = tf.nn.bias_add(outputs, bias)
            if activation is not None:
                return activation(outputs)  # pylint: disable=not-callable
            return outputs
Exemple #4
0
 def _prediction_network(self, obs):
     """Prediction network used by RND to predict to target network output."""
     with slim.arg_scope(
         [slim.conv2d, slim.fully_connected],
             weights_initializer=tf.orthogonal_initializer(gain=np.sqrt(2)),
             biases_initializer=tf.zeros_initializer()):
         net = slim.conv2d(obs,
                           32, [8, 8],
                           stride=4,
                           activation_fn=tf.nn.leaky_relu)
         net = slim.conv2d(net,
                           64, [4, 4],
                           stride=2,
                           activation_fn=tf.nn.leaky_relu)
         net = slim.conv2d(net,
                           64, [3, 3],
                           stride=1,
                           activation_fn=tf.nn.leaky_relu)
         net = slim.flatten(net)
         net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu)
         net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu)
         embedding = slim.fully_connected(net,
                                          self.embedding_size,
                                          activation_fn=None)
     return embedding
Exemple #5
0
    def fully_connected(self,
                        inputs,
                        output_size,
                        scope="full_connected",
                        is_activation=None):

        # get feature num
        shape = inputs.get_shape().as_list()
        # convolution layer
        if len(shape) == 4:
            input_size = shape[-1] * shape[-2] * shape[-3]
        # dense layers
        else:
            input_size = shape[1]

        with tf.variable_scope(name_or_scope=scope):
            flat_data = tf.reshape(tensor=inputs,
                                   shape=[-1, input_size],
                                   name='flatten')

            weights = tf.get_variable(shape=(input_size, output_size),
                                      initializer=tf.orthogonal_initializer(),
                                      name='W')

            biases = tf.get_variable('b',
                                     shape=(output_size),
                                     initializer=tf.zeros_initializer())

            if is_activation is not None:
                return tf.nn.relu_layer(x=input_size,
                                        weights=weights,
                                        biases=biases)
            else:
                return tf.nn.bias_add(value=tf.matmul(flat_data, weights),
                                      bias=biases)
def get_variable_initializer(hparams):
  """Get variable initializer from hparams."""
  if not hparams.initializer:
    return None

  mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_INITIALIZER_GAIN,
                               value=hparams.initializer_gain,
                               hparams=hparams)

  if not tf.executing_eagerly():
    tf.logging.info("Using variable initializer: %s", hparams.initializer)
  if hparams.initializer == "orthogonal":
    return tf.orthogonal_initializer(gain=hparams.initializer_gain)
  elif hparams.initializer == "uniform":
    max_val = 0.1 * hparams.initializer_gain
    return tf.random_uniform_initializer(-max_val, max_val)
  elif hparams.initializer == "normal_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="normal")
  elif hparams.initializer == "uniform_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="uniform")
  elif hparams.initializer == "xavier":
    return tf.initializers.glorot_uniform()
  else:
    raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
def conv_layer(inputs, filters, kernel_size, strides, gain=1.0):
    return tf.layers.conv2d(
        inputs=inputs,
        filters=filters,
        kernel_size=kernel_size,
        strides=(strides, strides),
        activation=tf.nn.relu,
        kernel_initializer=tf.orthogonal_initializer(gain=gain))
Exemple #8
0
def head(endpoints, embedding_dim, is_training):
    endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected(
        endpoints['model_output'],
        embedding_dim,
        activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(),
        scope='emb')

    return endpoints
def _create_conv2d_initializer(input_shape,
                               output_channels,
                               kernel_shape,
                               dtype=tf.float32):  # pylint: disable=unused-argument
    """Returns a default initializer for the weights of a convolutional module."""
    return {
        'w': tf.orthogonal_initializer(),
        'b': tf.zeros_initializer(dtype=dtype),
    }
    def __init__(self, input_dim):
        """Initializes a policy network.

    Args:
      input_dim: size of the input space
    """
        super(CriticDDPG, self).__init__()

        self.main = tf.keras.Sequential([
            tf.layers.Dense(units=400,
                            input_shape=(input_dim, ),
                            activation='relu',
                            kernel_initializer=tf.orthogonal_initializer()),
            tf.layers.Dense(units=300,
                            activation='relu',
                            kernel_initializer=tf.orthogonal_initializer()),
            tf.layers.Dense(units=1,
                            kernel_initializer=tf.orthogonal_initializer())
        ])
Exemple #11
0
 def _target_network(self, obs):
   """Implements the random target network used by RND."""
   with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=False,
                       weights_initializer=tf.orthogonal_initializer(
                           gain=np.sqrt(2)),
                       biases_initializer=tf.zeros_initializer()):
     net = slim.conv2d(obs, 32, [8, 8], stride=4,
                       activation_fn=tf.nn.leaky_relu)
     net = slim.conv2d(net, 64, [4, 4], stride=2,
                       activation_fn=tf.nn.leaky_relu)
     net = slim.conv2d(net, 64, [3, 3], stride=1,
                       activation_fn=tf.nn.leaky_relu)
     net = slim.flatten(net)
     embedding = slim.fully_connected(net, self.embedding_size,
                                      activation_fn=None)
   return embedding
def head(endpoints, embedding_dim, is_training):
    endpoints['head_output'] = slim.fully_connected(
        endpoints['model_output'],
        1024,
        normalizer_fn=slim.batch_norm,
        normalizer_params={
            'decay': 0.9,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training,
            'updates_collections': tf.GraphKeys.UPDATE_OPS,
        })

    endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected(
        endpoints['head_output'],
        embedding_dim,
        activation_fn=None,
        weights_initializer=tf.orthogonal_initializer(),
        scope='emb')

    return endpoints
def _create_linear_initializer(input_size, output_size, dtype=tf.float32):  # pylint: disable=unused-argument
    """Returns a default initializer for the weights of a linear module."""
    return {
        'w': tf.orthogonal_initializer(),
        'b': tf.zeros_initializer(dtype=dtype),
    }
Exemple #14
0
    def _build_seperate(self, hp):
        # Input, target output, and cost mask
        # Shape: [Time, Batch, Num_units]
        n_input = hp['n_input']
        n_rnn = hp['n_rnn']
        n_output = hp['n_output']

        self.x = tf.placeholder("float", [None, None, n_input])
        self.y = tf.placeholder("float", [None, None, n_output])
        self.c_mask = tf.placeholder("float", [None, n_output])

        sensory_inputs, rule_inputs = tf.split(
            self.x, [hp['rule_start'], hp['n_rule']], axis=-1)

        sensory_rnn_inputs = tf.layers.dense(sensory_inputs,
                                             n_rnn,
                                             name='sen_input')

        if 'mix_rule' in hp and hp['mix_rule'] is True:
            # rotate rule matrix
            kernel_initializer = tf.orthogonal_initializer()
            rule_inputs = tf.layers.dense(
                rule_inputs,
                hp['n_rule'],
                name='mix_rule',
                use_bias=False,
                trainable=False,
                kernel_initializer=kernel_initializer)

        rule_rnn_inputs = tf.layers.dense(rule_inputs,
                                          n_rnn,
                                          name='rule_input',
                                          use_bias=False)

        rnn_inputs = sensory_rnn_inputs + rule_rnn_inputs

        # Recurrent activity
        cell = LeakyRNNCellSeparateInput(n_rnn,
                                         hp['alpha'],
                                         sigma_rec=hp['sigma_rec'],
                                         activation=hp['activation'],
                                         w_rec_init=hp['w_rec_init'],
                                         rng=self.rng)

        # Dynamic rnn with time major
        self.h, states = rnn.dynamic_rnn(cell,
                                         rnn_inputs,
                                         dtype=tf.float32,
                                         time_major=True)

        # Output
        h_shaped = tf.reshape(self.h, (-1, n_rnn))
        y_shaped = tf.reshape(self.y, (-1, n_output))
        # y_hat shape (n_time*n_batch, n_unit)
        y_hat = tf.layers.dense(h_shaped,
                                n_output,
                                activation=tf.nn.sigmoid,
                                name='output')
        # Least-square loss
        self.cost_lsq = tf.reduce_mean(
            tf.square((y_shaped - y_hat) * self.c_mask))

        self.y_hat = tf.reshape(y_hat, (-1, tf.shape(self.h)[1], n_output))
        y_hat_fix, y_hat_ring = tf.split(self.y_hat, [1, n_output - 1],
                                         axis=-1)
        self.y_hat_loc = tf_popvec(y_hat_ring)
Exemple #15
0
def dcgan_discriminator(x, flags, scope=None, reuse=None, return_acts=False):
    """DCGAN-style discriminator network."""
    nonlinearity = nonlinearity_fn(flags.nonlinearity_d, True)
    ds_fs = flags.downsample_conv_filt_size
    x_fs = flags.extra_conv_filt_size

    acts = []
    with tf.variable_scope(scope, reuse=reuse):
        if not flags.norm_d:
            normalizer = None
        elif flags.algorithm == 'vanilla':
            normalizer = contrib_slim.batch_norm
        else:
            normalizer = contrib_slim.layer_norm

        if flags.initializer_d == 'xavier':
            initializer = contrib_layers.xavier_initializer()
        elif flags.initializer_d == 'orth_gain2':
            initializer = tf.orthogonal_initializer(gain=2.)
        elif flags.initializer_d == 'he':
            initializer = contrib_layers.variance_scaling_initializer()
        elif flags.initializer_d == 'he_uniform':
            initializer = contrib_layers.variance_scaling_initializer(
                uniform=True)

        out = contrib_slim.conv2d(x,
                                  flags.dim_d,
                                  ds_fs,
                                  scope='conv1',
                                  stride=2,
                                  activation_fn=nonlinearity,
                                  weights_initializer=initializer)
        acts.append(out)

        for i in range(flags.extra_depth_d):
            out = contrib_slim.conv2d(out,
                                      flags.dim_d,
                                      x_fs,
                                      scope='extraconv1.{}'.format(i),
                                      activation_fn=nonlinearity,
                                      normalizer_fn=normalizer,
                                      weights_initializer=initializer)
            acts.append(out)

        out = contrib_slim.conv2d(out,
                                  2 * flags.dim_d,
                                  ds_fs,
                                  scope='conv2',
                                  stride=2,
                                  activation_fn=nonlinearity,
                                  normalizer_fn=normalizer,
                                  weights_initializer=initializer)
        acts.append(out)

        for i in range(flags.extra_depth_d):
            out = contrib_slim.conv2d(out,
                                      2 * flags.dim_d,
                                      x_fs,
                                      scope='extraconv2.{}'.format(i),
                                      activation_fn=nonlinearity,
                                      normalizer_fn=normalizer,
                                      weights_initializer=initializer)
            acts.append(out)

        out = contrib_slim.conv2d(out,
                                  4 * flags.dim_d,
                                  ds_fs,
                                  scope='conv3',
                                  stride=2,
                                  activation_fn=nonlinearity,
                                  normalizer_fn=normalizer,
                                  weights_initializer=initializer)
        acts.append(out)

        if flags.extra_top_conv:
            out = contrib_slim.conv2d(out,
                                      4 * flags.dim_d,
                                      x_fs,
                                      scope='extratopconv',
                                      activation_fn=nonlinearity,
                                      normalizer_fn=normalizer,
                                      weights_initializer=initializer)
            acts.append(out)

        out = tf.reshape(out, [-1, 4 * 4 * (4 * flags.dim_d)])
        out = contrib_slim.fully_connected(out,
                                           1,
                                           scope='fc',
                                           activation_fn=None)
        acts.append(out)

        if return_acts:
            return out, acts
        else:
            return out
Exemple #16
0
def get_rnn_cell(mode,
                 hps,
                 input_dim,
                 num_units,
                 num_layers=1,
                 dropout=0.,
                 mem_input=None,
                 use_beam=False,
                 cell_type="lstm",
                 reuse=None):
  """Construct RNN cells.

  Args:
    mode: train or eval. Keys from tf.estimator.ModeKeys.
    hps: Hyperparameters.
    input_dim: input size.
    num_units: hidden state size.
    num_layers: number of RNN layers.
    dropout: drop rate of RNN dropout.
    mem_input: mem_input
    use_beam: Use beam search or not.
    cell_type: [`lstm`, `hyperlsm`].
    reuse: Reuse option.

  Returns:
    RNN cell.
  """

  cells = []
  for i in xrange(num_layers):
    input_size = input_dim if i == 0 else num_units
    scale = 1.
    if cell_type == "lstm":
      cell = tf.contrib.rnn.LSTMCell(
          num_units=num_units,
          initializer=tf.orthogonal_initializer(scale),
          reuse=reuse)
    elif cell_type == "gru":
      cell = tf.contrib.rnn.GRUCell(
          num_units=num_units,
          kernel_initializer=tf.orthogonal_initializer(scale),
          reuse=reuse)
    elif cell_type == "hyper_lstm":
      cell = HyperLSTMCell(
          num_units=num_units,
          mem_input=mem_input,
          use_beam=use_beam,
          initializer=tf.orthogonal_initializer(scale),
          hps=hps,
          reuse=reuse)
    else:
      assert False
    if mode == tf_estimator.ModeKeys.TRAIN and dropout > 0.:
      cell = tf.nn.rnn_cell.DropoutWrapper(
          cell,
          input_size=input_size,
          output_keep_prob=1.0 - dropout,
          variational_recurrent=True,
          dtype=tf.float32)
    if hps.use_residual and num_layers > 1:
      cell = tf.nn.rnn_cell.ResidualWrapper(cell=cell)
    cells.append(cell)

  cell = tf.nn.rnn_cell.MultiRNNCell(cells)
  return cell
def fc_layer(inputs, units, activations_fn=tf.nn.relu, gain=1.0):
    return tf.layers.dense(inputs=inputs,
                           units=units,
                           activation=activations_fn,
                           kernel_initializer=tf.orthogonal_initializer(gain))
Exemple #18
0
from __future__ import division
from __future__ import print_function

from model import HierarchicalProbUNet
import tensorflow.compat.v1 as tf


_NUM_CLASSES = 2
_BATCH_SIZE = 2
_SPATIAL_SHAPE = [32, 32]
_CHANNELS_PER_BLOCK = [5, 7, 9, 11, 13]
_IMAGE_SHAPE = [_BATCH_SIZE] + _SPATIAL_SHAPE + [1]
_BOTTLENECK_SIZE = _SPATIAL_SHAPE[0] // 2 ** (len(_CHANNELS_PER_BLOCK) - 1)
_SEGMENTATION_SHAPE = [_BATCH_SIZE] + _SPATIAL_SHAPE + [_NUM_CLASSES]
_LATENT_DIMS = [3, 2, 1]
_INITIALIZERS = {'w': tf.orthogonal_initializer(gain=1.0, seed=None),
                 'b': tf.truncated_normal_initializer(stddev=0.001)}


def _get_placeholders():
  """Returns placeholders for the image and segmentation."""
  img = tf.placeholder(dtype=tf.float32, shape=_IMAGE_SHAPE)
  seg = tf.placeholder(dtype=tf.float32, shape=_SEGMENTATION_SHAPE)
  return img, seg


class HierarchicalProbUNetTest(tf.test.TestCase):

  def test_shape_of_sample(self):
    hpu_net = HierarchicalProbUNet(latent_dims=_LATENT_DIMS,
                                   channels_per_block=_CHANNELS_PER_BLOCK,
Exemple #19
0
    def __init__(self,
                 num_unique_documents,
                 vocab_size,
                 num_topics,
                 freqs,
                 embedding_size=128,
                 num_sampled=40,
                 learning_rate=1e-3,
                 lmbda=150.0,
                 alpha=None,
                 power=0.75,
                 batch_size=32,
                 clip_gradients=5.0,
                 **kwargs):
        device = get_device(**kwargs)
        _graph = tf.Graph()

        with _graph.as_default():
            with tf.device(device):
                moving_avgs = tf.train.ExponentialMovingAverage(0.9)
                self.batch_size = batch_size
                self.freqs = freqs

                self.X = tf.placeholder(tf.int32, shape=[None])
                self.Y = tf.placeholder(tf.int64, shape=[None])
                self.DOC = tf.placeholder(tf.int32, shape=[None])
                self.switch_loss = tf.Variable(0, trainable=False)
                train_labels = tf.reshape(self.Y, [-1, 1])
                sampler = tf.nn.fixed_unigram_candidate_sampler(
                    train_labels,
                    num_true=1,
                    num_sampled=num_sampled,
                    unique=True,
                    range_max=vocab_size,
                    distortion=power,
                    unigrams=self.freqs,
                )

                self.word_embedding = tf.Variable(
                    tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
                self.nce_weights = tf.Variable(
                    tf.truncated_normal(
                        [vocab_size, embedding_size],
                        stddev=tf.sqrt(1 / embedding_size),
                    ))
                self.nce_biases = tf.Variable(tf.zeros([vocab_size]))
                scalar = 1 / np.sqrt(num_unique_documents + num_topics)
                self.doc_embedding = tf.Variable(
                    tf.random_normal(
                        [num_unique_documents, num_topics],
                        mean=0,
                        stddev=50 * scalar,
                    ))
                self.topic_embedding = tf.get_variable(
                    'topic_embedding',
                    shape=[num_topics, embedding_size],
                    dtype=tf.float32,
                    initializer=tf.orthogonal_initializer(gain=scalar),
                )
                pivot = tf.nn.embedding_lookup(self.word_embedding, self.X)
                proportions = tf.nn.embedding_lookup(self.doc_embedding,
                                                     self.DOC)
                doc = tf.matmul(proportions, self.topic_embedding)
                doc_context = doc
                word_context = pivot
                context = tf.add(word_context, doc_context)
                loss_word2vec = tf.reduce_mean(
                    tf.nn.nce_loss(
                        weights=self.nce_weights,
                        biases=self.nce_biases,
                        labels=self.Y,
                        inputs=context,
                        num_sampled=num_sampled,
                        num_classes=vocab_size,
                        num_true=1,
                        sampled_values=sampler,
                    ))
                self.fraction = tf.Variable(1,
                                            trainable=False,
                                            dtype=tf.float32)

                n_topics = self.doc_embedding.get_shape()[1].value
                log_proportions = tf.nn.log_softmax(self.doc_embedding)
                if alpha is None:
                    alpha = 1.0 / n_topics
                loss = (alpha - 1) * log_proportions
                prior = tf.reduce_sum(loss)

                loss_lda = lmbda * self.fraction * prior
                global_step = tf.Variable(0,
                                          trainable=False,
                                          name='global_step')
                self.cost = tf.cond(
                    global_step < self.switch_loss,
                    lambda: loss_word2vec,
                    lambda: loss_word2vec + loss_lda,
                )
                loss_avgs_op = moving_avgs.apply(
                    [loss_lda, loss_word2vec, self.cost])
                with tf.control_dependencies([loss_avgs_op]):
                    optimizer = tf.train.AdamOptimizer(
                        learning_rate=learning_rate)
                    gvs = optimizer.compute_gradients(self.cost)
                    capped_gvs = [(
                        tf.clip_by_value(grad, -clip_gradients,
                                         clip_gradients),
                        var,
                    ) for grad, var in gvs]
                    self.optimizer = optimizer.apply_gradients(capped_gvs)
                self.sess = generate_session(_graph, **kwargs)
                self.sess.run(tf.global_variables_initializer())
def biaffine_mapping(vector_set_1,
                     vector_set_2,
                     output_size,
                     add_bias_1=True,
                     add_bias_2=True,
                     initializer=None):
    """Bilinear mapping: maps two vector spaces to a third vector space.

  The input vector spaces are two 3d matrices: batch size x bucket size x values
  A typical application of the function is to compute a square matrix
  representing a dependency tree. The output is for each bucket a square
  matrix of the form [bucket size, output size, bucket size]. If the output size
  is set to 1 then results is [bucket size, 1, bucket size] equivalent to
  a square matrix where the bucket for instance represent the tokens on
  the x-axis and y-axis. In this way represent the adjacency matrix of a
  dependency graph (see https://arxiv.org/abs/1611.01734).

  Args:
     vector_set_1: vectors of space one
     vector_set_2: vectors of space two
     output_size: number of output labels (e.g. edge labels)
     add_bias_1: Whether to add a bias for input one
     add_bias_2: Whether to add a bias for input two
     initializer: Initializer for the bilinear weight map

  Returns:
    Output vector space as 4d matrix:
    batch size x bucket size x output size x bucket size
    The output could represent an unlabeled dependency tree when
    the output size is 1 or a labeled tree otherwise.

  """
    with tf.variable_scope('Bilinear'):
        # Dynamic shape info
        batch_size = tf.shape(vector_set_1)[0]
        bucket_size = tf.shape(vector_set_1)[1]

        if add_bias_1:
            vector_set_1 = tf.concat(
                [vector_set_1,
                 tf.ones([batch_size, bucket_size, 1])], axis=2)
        if add_bias_2:
            vector_set_2 = tf.concat(
                [vector_set_2,
                 tf.ones([batch_size, bucket_size, 1])], axis=2)

        # Static shape info
        vector_set_1_size = vector_set_1.get_shape().as_list()[-1]
        vector_set_2_size = vector_set_2.get_shape().as_list()[-1]

        if not initializer:
            initializer = tf.orthogonal_initializer()

        # Mapping matrix
        bilinear_map = tf.get_variable(
            'bilinear_map',
            [vector_set_1_size, output_size, vector_set_2_size],
            initializer=initializer)

        # The matrix operations and reshapings for bilinear mapping.
        # b: batch size (batch of buckets)
        # v1, v2: values (size of vectors)
        # n: tokens (size of bucket)
        # r: labels (output size), e.g. 1 if unlabeled or number of edge labels.

        # [b, n, v1] -> [b*n, v1]
        vector_set_1 = tf.reshape(vector_set_1, [-1, vector_set_1_size])

        # [v1, r, v2] -> [v1, r*v2]
        bilinear_map = tf.reshape(bilinear_map, [vector_set_1_size, -1])

        # [b*n, v1] x [v1, r*v2] -> [b*n, r*v2]
        bilinear_mapping = tf.matmul(vector_set_1, bilinear_map)

        # [b*n, r*v2] -> [b, n*r, v2]
        bilinear_mapping = tf.reshape(
            bilinear_mapping,
            [batch_size, bucket_size * output_size, vector_set_2_size])

        # [b, n*r, v2] x [b, n, v2]T -> [b, n*r, n]
        bilinear_mapping = tf.matmul(bilinear_mapping,
                                     vector_set_2,
                                     adjoint_b=True)

        # [b, n*r, n] -> [b, n, r, n]
        bilinear_mapping = tf.reshape(
            bilinear_mapping,
            [batch_size, bucket_size, output_size, bucket_size])
        return bilinear_mapping
Exemple #21
0
    def _build_net(self):

        with tf.variable_scope("Actor" + self.suffix):

            with tf.name_scope('inputs' + self.suffix):
                self.tf_obs = tf.placeholder(tf.float32,
                                             [None, self.n_features],
                                             name='observation' + self.suffix)
                self.tf_acts = tf.placeholder(tf.int32, [
                    None,
                ],
                                              name='actions_num' + self.suffix)
                self.tf_vt = tf.placeholder(tf.float32, [
                    None,
                ],
                                            name='actions_value' + self.suffix)
                self.tf_safe = tf.placeholder(tf.float32, [
                    None,
                ],
                                              name='safety_value' +
                                              self.suffix)
                self.entropy_weight = tf.placeholder(
                    tf.float32,
                    shape=(),
                    name='entropy_weight_clustering' + self.suffix)

                ##### PPO change #####
                self.ppo_ratio = tf.placeholder(tf.float32, [
                    None,
                ],
                                                name='ppo_ratio' + self.suffix)
                ##### PPO change #####

            layer = tf.layers.dense(
                inputs=self.tf_obs,
                units=128,
                activation=tf.nn.tanh,
                # kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3),
                kernel_initializer=tf.orthogonal_initializer(
                    gain=np.sqrt(2.)),  # ppo default initialization
                bias_initializer=tf.constant_initializer(0.1),
                name='fc1' + self.suffix)

            all_act = tf.layers.dense(
                inputs=layer,
                units=self.n_actions,
                activation=None,
                # kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3),
                kernel_initializer=tf.orthogonal_initializer(
                    gain=np.sqrt(2.)),  # ppo default initialization
                bias_initializer=tf.constant_initializer(0.1),
                name='fc2' + self.suffix)

            self.trainable_variables = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope='Actor' + self.suffix)
            self.trainable_variables_shapes = [
                var.get_shape().as_list() for var in self.trainable_variables
            ]

            # sampling
            self.all_act_prob = tf.nn.softmax(all_act,
                                              name='act_prob' + self.suffix)
            self.all_act_prob = tf.clip_by_value(self.all_act_prob, 1e-20, 1.0)

            with tf.name_scope('loss' + self.suffix):
                neg_log_prob = tf.reduce_sum(
                    -tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) *
                    tf.one_hot(indices=self.tf_acts, depth=self.n_actions),
                    axis=1)
                loss = tf.reduce_mean(neg_log_prob * self.tf_vt)
                loss += self.entropy_weight * tf.reduce_mean(
                    tf.reduce_sum(
                        tf.log(tf.clip_by_value(self.all_act_prob, 1e-30,
                                                1.0)) * self.all_act_prob,
                        axis=1))
                self.entro = self.entropy_weight * tf.reduce_mean(
                    tf.reduce_sum(
                        tf.log(tf.clip_by_value(self.all_act_prob, 1e-30,
                                                1.0)) * self.all_act_prob,
                        axis=1))
                self.loss = loss
            with tf.name_scope('train' + self.suffix):
                self.train_op = tf.train.AdamOptimizer(
                    self.pg_lr).minimize(loss)

            # safety loss
            """
            * -1?
            """
            self.chosen_action_log_probs = tf.reduce_sum(
                tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) *
                tf.one_hot(indices=self.tf_acts, depth=self.n_actions),
                axis=1)
            ##### PPO CHANGE #####
            self.ppo_old_chosen_action_log_probs = tf.placeholder(
                tf.float32, [None])
            ##### PPO CHANGE #####
            self.old_chosen_action_log_probs = tf.stop_gradient(
                tf.placeholder(tf.float32, [None]))
            # self.each_safety_loss = tf.exp(self.chosen_action_log_probs - self.old_chosen_action_log_probs) * self.tf_safe
            self.each_safety_loss = (
                tf.exp(self.chosen_action_log_probs) -
                tf.exp(self.old_chosen_action_log_probs)) * self.tf_safe
            self.average_safety_loss = tf.reduce_mean(
                self.each_safety_loss)  #/ self.n_episodes tf.reduce_sum
            # self.average_safety_loss +=self.entro

            # KL D
            self.old_all_act_prob = tf.stop_gradient(
                tf.placeholder(tf.float32, [None, self.n_actions]))

            def kl(x, y):
                EPS = 1e-10
                x = tf.where(tf.abs(x) < EPS, EPS * tf.ones_like(x), x)
                y = tf.where(tf.abs(y) < EPS, EPS * tf.ones_like(y), y)
                X = tf.distributions.Categorical(probs=x + EPS)
                Y = tf.distributions.Categorical(probs=y + EPS)
                return tf.distributions.kl_divergence(X,
                                                      Y,
                                                      allow_nan_stats=False)

            self.each_kl_divergence = kl(
                self.all_act_prob, self.old_all_act_prob
            )  # tf.reduce_sum(kl(self.all_act_prob, self.old_all_act_prob), axis=1)
            self.average_kl_divergence = tf.reduce_mean(
                self.each_kl_divergence)
            # self.kl_gradients = tf.gradients(self.average_kl_divergence, self.trainable_variables)  # useless

            self.desired_kl = desired_kl
            # self.metrics = [self.loss, self.average_kl_divergence, self.average_safety_loss, self.entro] # Luping
            self.metrics = [
                self.loss, self.loss, self.average_safety_loss, self.entro
            ]  # Luping

            # FLat
            self.flat_params_op = get_flat_params(self.trainable_variables)
            """not use tensorflow default function, here we calculate the gradient by self:
            (1) loss: g
            (2) kl: directional_gradients (math, fisher)
            (3) safe: b 
            """
            ##### PPO change #####
            #### PPO Suyi's Change ####
            with tf.name_scope('ppoloss' + self.suffix):
                self.ppo_ratio = tf.exp(self.chosen_action_log_probs -
                                        self.ppo_old_chosen_action_log_probs)
                # self.ppo_ratio = tf.Print(self.ppo_ratio, [self.ppo_ratio], "self.ppo_ratio: ")

                surr = self.ppo_ratio * self.tf_vt
                self.ppoloss = -tf.reduce_mean(
                    tf.minimum(
                        surr,
                        tf.clip_by_value(self.ppo_ratio, 1. - self.clip_eps,
                                         1. + self.clip_eps) * self.tf_vt))

                self.ppoloss += self.entropy_weight * tf.reduce_mean(
                    tf.reduce_sum(
                        tf.log(tf.clip_by_value(self.all_act_prob, 1e-30,
                                                1.0)) * self.all_act_prob,
                        axis=1))
                # self.ppoloss += 0.01 * tf.reduce_mean(tf.reduce_sum(tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1))

            with tf.variable_scope('ppotrain'):
                # self.atrain_op = tf.train.AdamOptimizer(self.lr).minimize(self.ppoloss)
                self.atrain_op = tf.train.AdamOptimizer(self.lr).minimize(
                    self.ppoloss)
            #### PPO Suyi's Change ####

            self.ppoloss_flat_gradients_op = get_flat_gradients(
                self.ppoloss, self.trainable_variables)
            ##### PPO change #####

            self.loss_flat_gradients_op = get_flat_gradients(
                self.loss, self.trainable_variables)
            self.kl_flat_gradients_op = get_flat_gradients(
                self.average_kl_divergence, self.trainable_variables)
            self.constraint_flat_gradients_op = get_flat_gradients(
                self.average_safety_loss, self.trainable_variables)

            self.vec = tf.placeholder(tf.float32, [None])
            self.fisher_product_op = self.get_fisher_product_op()

            self.new_params = tf.placeholder(tf.float32, [None])
            self.params_assign_op = assign_network_params_op(
                self.new_params, self.trainable_variables,
                self.trainable_variables_shapes)
Exemple #22
0
# @time    :   2021/03/17 20:10:29
"""Tests for the Hierarchical Probabilistic U-Net open-source version"""

from model import HierarchicalProbUNet
import tensorflow.compat.v1 as tf

_NUM_CLASSES = 2
_BATCH_SIZE = 2
_SPATIAL_SHAPE = [32, 32]
_CHANNELS_PER_BLOCK = [5, 7, 9, 11, 13]
_IMAGE_SHAPE = [_BATCH_SIZE] + _SPATIAL_SHAPE + [1]
_BOTTLENECK_SIZE = _SPATIAL_SHAPE[0] // 2**(len(_CHANNELS_PER_BLOCK) - 1)
_SEGMENTATION_SHAPE = [_BATCH_SIZE] + _SPATIAL_SHAPE + [_NUM_CLASSES]
_LATENT_DIMS = [3, 2, 1]
_INITIALIZERS = {
    'w': tf.orthogonal_initializer(gain=1.0, seed=None),
    'b': tf.truncated_normal_initializer(stddev=0.001)
}


def _get_placeholders():
    """Returns placeholders for the image and segmentation."""
    img = tf.placeholder(dtype=tf.float32, shape=_IMAGE_SHAPE)
    seg = tf.placeholder(dtype=tf.float32, shape=_SEGMENTATION_SHAPE)
    return img, seg


class HierarchicalProbUNetTest(tf.test.TestCase):
    def test_shape_of_sample(self):
        hpu_net = HierarchicalProbUNet(latent_dims=_LATENT_DIMS,
                                       channels_per_block=_CHANNELS_PER_BLOCK,