Example #1
0
    def build_predict(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + eye(tf.shape(self.X)[0]) * self.likelihood.variance
        L = tf.cholesky(K)
        A = tf.matrix_triangular_solve(L, Kx, lower=True)
        V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X))
        fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)
        if full_cov:
            fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
        else:
            fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
            fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, tf.shape(self.Y)[1]])
        return fmean, fvar
Example #2
0
def resize_images(X, height_factor, width_factor, dim_ordering):
    '''Resizes the images contained in a 4D tensor of shape
    - [batch, channels, height, width] (for 'th' dim_ordering)
    - [batch, height, width, channels] (for 'tf' dim_ordering)
    by a factor of (height_factor, width_factor). Both factors should be
    positive integers.
    '''
    if dim_ordering == 'th':
        original_shape = int_shape(X)
        new_shape = tf.shape(X)[2:]
        new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
        X = permute_dimensions(X, [0, 2, 3, 1])
        X = tf.image.resize_nearest_neighbor(X, new_shape)
        X = permute_dimensions(X, [0, 3, 1, 2])
        X.set_shape((None, None, original_shape[2] * height_factor, original_shape[3] * width_factor))
        return X
    elif dim_ordering == 'tf':
        original_shape = int_shape(X)
        new_shape = tf.shape(X)[1:3]
        new_shape *= tf.constant(np.array([height_factor, width_factor]).astype('int32'))
        X = tf.image.resize_nearest_neighbor(X, new_shape)
        X.set_shape((None, original_shape[1] * height_factor, original_shape[2] * width_factor, None))
        return X
    else:
        raise Exception('Invalid dim_ordering: ' + dim_ordering)
Example #3
0
def din_fcn_shine(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False):
    if isinstance(facts, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN
        # outputs.
        facts = tf.concat(facts, 2)

    if time_major:
        # (T,B,D) => (B,T,D)
        facts = tf.array_ops.transpose(facts, [1, 0, 2])
    # Trainable parameters
    mask = tf.equal(mask, tf.ones_like(mask))
    # D value - hidden size of the RNN layer
    facts_size = facts.get_shape().as_list()[-1]
    querry_size = query.get_shape().as_list()[-1]
    query = tf.layers.dense(
        query, facts_size, activation=None, name='f1_trans_shine' + stag)
    query = prelu(query)
    queries = tf.tile(query, [1, tf.shape(facts)[1]])
    queries = tf.reshape(queries, tf.shape(facts))
    din_all = tf.concat(
        [queries, facts, queries - facts, queries * facts], axis=-1)
    d_layer_1_all = tf.layers.dense(
        din_all, facts_size, activation=tf.nn.sigmoid, name='f1_shine_att' + stag)
    d_layer_2_all = tf.layers.dense(
        d_layer_1_all, facts_size, activation=tf.nn.sigmoid, name='f2_shine_att' + stag)
    d_layer_2_all = tf.reshape(d_layer_2_all, tf.shape(facts))
    output = d_layer_2_all
    return output
def attention_bah_block(hidden_for_sketch, hidden_for_attn_alignment, attention_depth):
    """ It is a implementation of the Bahdanau et al. attention mechanism with concat score and the constrained softmax (csoftmax).
        Based on the papers:
            https://arxiv.org/abs/1409.0473 "Neural Machine Translation by Jointly Learning to Align and Translate"
            https://andre-martins.github.io/docs/emnlp2017_final.pdf "Learning What's Easy: Fully Differentiable Neural Easy-First Taggers"
    Args:
        hidden_for_sketch: A tensorflow tensor for a sketch computing. This tensor have dimensionality [None, max_num_tokens, sketch_hidden_size]
        hidden_for_attn_alignment: A tensorflow tensor is aligned for output during a performing. This tensor have dimensionality [None, max_num_tokens, hidden_size_for_attn_alignment]
        key: A tensorflow tensor with dimensionality [None, None, key_size]
        attention_depth: Number of usage csoftmax
    Returns:
        final_aligned_hiddens: Tensor at the output with dimensionality [1, attention_depth, hidden_size_for_attn_alignment]
    """
    with tf.name_scope('attention_block'):
        sketch_dims = tf.shape(hidden_for_sketch)
        batch_size = sketch_dims[0]
        num_tokens = sketch_dims[1]
        hidden_size = sketch_dims[2]

        attn_alignment_dims = tf.shape(hidden_for_attn_alignment)
        attn_alignment_hidden_size = attn_alignment_dims[2]

        sketches = [tf.zeros(shape=[batch_size, hidden_size], dtype=tf.float32)]
        aligned_hiddens = []
        cum_att = tf.zeros(shape=[batch_size, num_tokens])  # cumulative attention
        for i in range(attention_depth):
            sketch, cum_att_, aligned_hidden = attention_bah_step(hidden_for_sketch, hidden_for_attn_alignment, sketches[-1], cum_att)
            sketches.append(sketch) #sketch
            aligned_hiddens.append(aligned_hidden) #sketch
            cum_att += cum_att_
        final_aligned_hiddens = tf.reshape(tf.transpose(tf.stack(aligned_hiddens), [1, 0, 2]),[1, attention_depth, attn_alignment_hidden_size])
    return final_aligned_hiddens
Example #5
0
    def __init__(self, name, input_shape, output_dim, hidden_dim, hidden_nonlinearity=tf.nn.relu,
                 lstm_layer_cls=L.LSTMLayer,
                 output_nonlinearity=None, input_var=None, input_layer=None, forget_bias=1.0, use_peepholes=False,
                 layer_args=None):
        with tf.variable_scope(name):
            if input_layer is None:
                l_in = L.InputLayer(shape=(None, None) + input_shape, input_var=input_var, name="input")
            else:
                l_in = input_layer
            l_step_input = L.InputLayer(shape=(None,) + input_shape, name="step_input")
            # contains previous hidden and cell state
            l_step_prev_state = L.InputLayer(shape=(None, hidden_dim * 2), name="step_prev_state")
            if layer_args is None:
                layer_args = dict()
            l_lstm = lstm_layer_cls(l_in, num_units=hidden_dim, hidden_nonlinearity=hidden_nonlinearity,
                                    hidden_init_trainable=False, name="lstm", forget_bias=forget_bias,
                                    cell_init_trainable=False, use_peepholes=use_peepholes, **layer_args)
            l_lstm_flat = L.ReshapeLayer(
                l_lstm, shape=(-1, hidden_dim),
                name="lstm_flat"
            )
            l_output_flat = L.DenseLayer(
                l_lstm_flat,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                name="output_flat"
            )
            l_output = L.OpLayer(
                l_output_flat,
                op=lambda flat_output, l_input:
                tf.reshape(flat_output, tf.pack((tf.shape(l_input)[0], tf.shape(l_input)[1], -1))),
                shape_op=lambda flat_output_shape, l_input_shape:
                (l_input_shape[0], l_input_shape[1], flat_output_shape[-1]),
                extras=[l_in],
                name="output"
            )
            l_step_state = l_lstm.get_step_layer(l_step_input, l_step_prev_state, name="step_state")
            l_step_hidden = L.SliceLayer(l_step_state, indices=slice(hidden_dim), name="step_hidden")
            l_step_cell = L.SliceLayer(l_step_state, indices=slice(hidden_dim, None), name="step_cell")
            l_step_output = L.DenseLayer(
                l_step_hidden,
                num_units=output_dim,
                nonlinearity=output_nonlinearity,
                W=l_output_flat.W,
                b=l_output_flat.b,
                name="step_output"
            )

            self._l_in = l_in
            self._hid_init_param = l_lstm.h0
            self._cell_init_param = l_lstm.c0
            self._l_lstm = l_lstm
            self._l_out = l_output
            self._l_step_input = l_step_input
            self._l_step_prev_state = l_step_prev_state
            self._l_step_hidden = l_step_hidden
            self._l_step_cell = l_step_cell
            self._l_step_state = l_step_state
            self._l_step_output = l_step_output
            self._hidden_dim = hidden_dim
Example #6
0
def feed_forward_cnn_small_categorical_fun(action_space, config, observations):
  """Small cnn network with categorical output."""
  del config

  obs_shape = observations.shape.as_list()
  x = tf.reshape(observations, [-1] + obs_shape[2:])

  with tf.variable_scope("policy"):
    x = tf.to_float(x) / 255.0
    x = tf.contrib.layers.conv2d(x, 32, [5, 5], [2, 2],
                                 activation_fn=tf.nn.relu, padding="SAME")
    x = tf.contrib.layers.conv2d(x, 32, [5, 5], [2, 2],
                                 activation_fn=tf.nn.relu, padding="SAME")

    flat_x = tf.reshape(
        x, [tf.shape(observations)[0], tf.shape(observations)[1],
            functools.reduce(operator.mul, x.shape.as_list()[1:], 1)])

    x = tf.contrib.layers.fully_connected(flat_x, 128, tf.nn.relu)
    logits = tf.contrib.layers.fully_connected(x, action_space.n,
                                               activation_fn=None)

    value = tf.contrib.layers.fully_connected(x, 1, activation_fn=None)[..., 0]
    policy = tf.contrib.distributions.Categorical(logits=logits)

  return NetworkOutput(policy, value, lambda a: a)
Example #7
0
 def dist_info_sym(self, obs_var, state_info_vars):
     n_batches = tf.shape(obs_var)[0]
     n_steps = tf.shape(obs_var)[1]
     obs_var = tf.reshape(obs_var, tf.pack([n_batches, n_steps, -1]))
     obs_var = tf.cast(obs_var, tf.float32)
     if self.state_include_action:
         prev_action_var = state_info_vars["prev_action"]
         prev_action_var = tf.cast(prev_action_var, tf.float32)
         all_input_var = tf.concat(2, [obs_var, prev_action_var])
     else:
         all_input_var = obs_var
     if self.feature_network is None:
         return dict(
             prob=L.get_output(
                 self.prob_network.output_layer,
                 {self.l_input: all_input_var}
             )
         )
     else:
         flat_input_var = tf.reshape(all_input_var, (-1, self.input_dim))
         return dict(
             prob=L.get_output(
                 self.prob_network.output_layer,
                 {self.l_input: all_input_var, self.feature_network.input_layer: flat_input_var}
             )
         )
Example #8
0
 def hinge_loss(self, y_true, y_pred):
     # Custom loss function
     margin = 0.1
     # loop counter
     i = tf.constant(0)
     # loop condition function
     c = lambda i, _: tf.less(i, tf.shape(y_true)[0])
     outer_sum_loss = tf.constant(0.0)
     def process_ele(i, outer_sum_loss):
         # Get a subtensor from batch
         y_true_one = y_true[i]
         y_pred_one = y_pred[i]
         # Stack margin to a num_class*1 matrix
         margin_stack = tf.reshape(tf.stack([tf.constant(0.1)] * self.num_classes), [self.num_classes, 1])
         # Stack true label to a word_dim*num_class matrix and transpose it
         y_true_one_stack = tf.stack([tf.transpose(y_true_one)] * self.num_classes)
         # Reshape predict from (word_dim,) to (word_dim,1)
         y_pred_one_t = tf.reshape(y_pred_one, [self.word_dim, 1])
         # Calculate loss
         r = margin_stack - tf.matmul(y_true_one_stack, y_pred_one_t) + tf.matmul(self.label_vec_tensor, y_pred_one_t)
         # Summation
         # We did not exclude true label inside summation, so we subtract extra margin
         sum_inner_loss = tf.reduce_sum(K.relu(r)) - margin
         # Return counter++ and accumulated loss
         return tf.add(i, 1), tf.add(outer_sum_loss, sum_inner_loss)
     
     _, outer_sum_loss = tf.while_loop(c, process_ele, [i, outer_sum_loss])
     # Return average loss over batch
     return outer_sum_loss / tf.cast(tf.shape(y_true)[0], dtype=tf.float32)
Example #9
0
	def _conv_layer(self, name, input_var, stride, in_channels, out_channels, options = {}):
		activation = options.get('activation', 'relu')
		dropout = options.get('dropout', None)
		padding = options.get('padding', 'SAME')
		batchnorm = options.get('batchnorm', True)
		transpose = options.get('transpose', False)

		with tf.variable_scope(name) as scope:
			if not transpose:
				filter_shape = [KERNEL_SIZE, KERNEL_SIZE, in_channels, out_channels]
			else:
				filter_shape = [KERNEL_SIZE, KERNEL_SIZE, out_channels, in_channels]
			kernel = tf.get_variable(
				'weights',
				shape=filter_shape,
				initializer=tf.truncated_normal_initializer(stddev=math.sqrt(2.0 / KERNEL_SIZE / KERNEL_SIZE / in_channels)),
				dtype=tf.float32
			)
			biases = tf.get_variable(
				'biases',
				shape=[out_channels],
				initializer=tf.constant_initializer(0.0),
				dtype=tf.float32
			)
			if not transpose:
				output = tf.nn.bias_add(
					tf.nn.conv2d(
						input_var,
						kernel,
						[1, stride, stride, 1],
						padding=padding
					),
					biases
				)
			else:
				batch = tf.shape(input_var)[0]
				side = tf.shape(input_var)[1]
				output = tf.nn.bias_add(
					tf.nn.conv2d_transpose(
						input_var,
						kernel,
						[batch, side * stride, side * stride, out_channels],
						[1, stride, stride, 1],
						padding=padding
					),
					biases
				)
			if batchnorm:
				output = tf.contrib.layers.batch_norm(output, center=True, scale=True, is_training=self.is_training, decay=0.99)
			if dropout is not None:
				output = tf.nn.dropout(output, keep_prob=1-dropout)

			if activation == 'relu':
				return tf.nn.relu(output, name=scope.name)
			elif activation == 'sigmoid':
				return tf.nn.sigmoid(output, name=scope.name)
			elif activation == 'none':
				return output
			else:
				raise Exception('invalid activation {} specified'.format(activation))
def soft_alignment(U_AP, raw_question_rep, raw_answer_rep, tokens_question_non_zero, tokens_answer_non_zero):
    """Calculate the AP soft-alignment matrix (in a batch-friendly fashion)

    :param U_AP: The AP similarity matrix (to be learned)
    :param raw_question_rep:
    :param raw_answer_rep:
    :param tokens_question_non_zero:
    :param tokens_answer_non_zero:
    :return:
    """
    answer_transposed = tf.transpose(raw_answer_rep, [0, 2, 1])

    # Unfortunately, there is no clean way in TF to multiply a 3d tensor with a 2d tensor. We need to perform some
    # reshaping. Compare solution 2 on
    # http://stackoverflow.com/questions/38235555/tensorflow-matmul-of-input-matrix-with-batch-data
    raw_question_rep_flat = tf.reshape(raw_question_rep, [-1, tf.shape(raw_question_rep)[2]])
    QU_flat = tf.matmul(raw_question_rep_flat, U_AP)
    QU = tf.reshape(QU_flat, [-1, tf.shape(raw_question_rep)[1], tf.shape(raw_question_rep)[2]])
    QUA = tf.batch_matmul(QU, answer_transposed)
    G = tf.nn.tanh(QUA)

    # We are now removing all the fields of G that belong to zero padding. To achieve this, we are determining these
    # fields and adding a value of -2 to all of them (which is guaranteed to result in a smaller number than the minimum
    # of G, which is -1)
    additions_G_question = tf.transpose(
        tf.reshape((tokens_question_non_zero - 1) * 2, [-1, 1, tf.shape(tokens_question_non_zero)[1]]),
        [0, 2, 1]
    )
    additions_G_answer = tf.reshape((tokens_answer_non_zero - 1) * 2, [-1, 1, tf.shape(tokens_answer_non_zero)[1]])

    # G_non_zero contains values of less than -1 for all fields which have a relation to zero-padded token positions
    G_non_zero = G + additions_G_question + additions_G_answer

    return G_non_zero
    def fast_rcnn_minibatch(self, reference_boxes):
        with tf.variable_scope('fast_rcnn_minibatch'):

            reference_boxes_mattached_gtboxes, object_mask, label = \
                self.fast_rcnn_find_positive_negative_samples(reference_boxes)

            positive_indices = tf.reshape(tf.where(tf.not_equal(object_mask, 0.)), [-1])

            num_of_positives = tf.minimum(tf.shape(positive_indices)[0],
                                          tf.cast(self.fast_rcnn_minibatch_size*self.fast_rcnn_positives_ratio, tf.int32))

            positive_indices = tf.random_shuffle(positive_indices)
            positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives])

            negative_indices = tf.reshape(tf.where(tf.equal(object_mask, 0.)), [-1])
            num_of_negatives = tf.minimum(tf.shape(negative_indices)[0],
                                          self.fast_rcnn_minibatch_size - num_of_positives)

            negative_indices = tf.random_shuffle(negative_indices)
            negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives])

            minibatch_indices = tf.concat([positive_indices, negative_indices], axis=0)
            minibatch_indices = tf.random_shuffle(minibatch_indices)

            minibatch_reference_boxes_mattached_gtboxes = tf.gather(reference_boxes_mattached_gtboxes,
                                                                    minibatch_indices)
            object_mask = tf.gather(object_mask, minibatch_indices)
            label = tf.gather(label, minibatch_indices)
            label_one_hot = tf.one_hot(label, self.num_classes + 1)

            return minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, object_mask, label_one_hot
  def test_get_multi_class_predictions_for_five_aspect_ratios_per_location(
      self):
    num_classes_without_background = 6
    image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
    conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
        is_training=False,
        num_classes=num_classes_without_background,
        conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
        min_depth=0,
        max_depth=32,
        num_layers_before_predictor=1,
        use_dropout=True,
        dropout_keep_prob=0.8,
        kernel_size=1,
        box_code_size=4
    )
    box_predictions = conv_box_predictor.predict(
        image_features,
        num_predictions_per_location=5,
        scope='BoxPredictor')
    box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
    class_predictions_with_background = box_predictions[
        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      (box_encodings_shape, class_predictions_with_background_shape
      ) = sess.run([
          tf.shape(box_encodings), tf.shape(class_predictions_with_background)])
      self.assertAllEqual(box_encodings_shape, [4, 320, 1, 4])
      self.assertAllEqual(class_predictions_with_background_shape,
                          [4, 320, num_classes_without_background+1])
  def test_get_boxes_for_five_aspect_ratios_per_location_fully_convolutional(
      self):
    image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
    conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
        is_training=False,
        num_classes=0,
        conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
        min_depth=0,
        max_depth=32,
        num_layers_before_predictor=1,
        use_dropout=True,
        dropout_keep_prob=0.8,
        kernel_size=1,
        box_code_size=4
    )
    box_predictions = conv_box_predictor.predict(
        image_features, num_predictions_per_location=5, scope='BoxPredictor')
    box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
    objectness_predictions = box_predictions[
        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
    init_op = tf.global_variables_initializer()

    resolution = 32
    expected_num_anchors = resolution*resolution*5
    with self.test_session() as sess:
      sess.run(init_op)
      (box_encodings_shape,
       objectness_predictions_shape) = sess.run(
           [tf.shape(box_encodings), tf.shape(objectness_predictions)],
           feed_dict={image_features:
                      np.random.rand(4, resolution, resolution, 64)})
      self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4])
      self.assertAllEqual(objectness_predictions_shape,
                          [4, expected_num_anchors, 1])
Example #14
0
 def _build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = len(self.feature)
     err = self.Y - self.mean_function(self.X)
     Kuf = self.feature.Kuf(self.kern, self.X)
     Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
     Kus = self.feature.Kuf(self.kern, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tmp2, c, transpose_a=True)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
               - tf.matmul(tmp1, tmp1, transpose_a=True)
         shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
               - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.stack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
  def test_get_correct_box_encoding_and_class_prediction_shapes(self):
    image_features = tf.random_uniform([4, 8, 8, 64], dtype=tf.float32)
    proposal_boxes = tf.random_normal([4, 2, 4], dtype=tf.float32)
    rfcn_box_predictor = box_predictor.RfcnBoxPredictor(
        is_training=False,
        num_classes=2,
        conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
        num_spatial_bins=[3, 3],
        depth=4,
        crop_size=[12, 12],
        box_code_size=4
    )
    box_predictions = rfcn_box_predictor.predict(
        image_features, num_predictions_per_location=1, scope='BoxPredictor',
        proposal_boxes=proposal_boxes)
    box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
    class_predictions_with_background = box_predictions[
        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]

    init_op = tf.global_variables_initializer()
    with self.test_session() as sess:
      sess.run(init_op)
      (box_encodings_shape,
       class_predictions_shape) = sess.run(
           [tf.shape(box_encodings),
            tf.shape(class_predictions_with_background)])
      self.assertAllEqual(box_encodings_shape, [8, 1, 2, 4])
      self.assertAllEqual(class_predictions_shape, [8, 1, 3])
Example #16
0
 def test_get_boxes_with_five_classes_share_box_across_classes(self):
   image_features = tf.random_uniform([2, 7, 7, 3], dtype=tf.float32)
   mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
       is_training=False,
       num_classes=5,
       fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
       use_dropout=False,
       dropout_keep_prob=0.5,
       box_code_size=4,
       share_box_across_classes=True
   )
   box_predictions = mask_box_predictor.predict(
       [image_features], num_predictions_per_location=[1],
       scope='BoxPredictor')
   box_encodings = box_predictions[box_predictor.BOX_ENCODINGS]
   class_predictions_with_background = box_predictions[
       box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND]
   init_op = tf.global_variables_initializer()
   with self.test_session() as sess:
     sess.run(init_op)
     (box_encodings_shape,
      class_predictions_with_background_shape) = sess.run(
          [tf.shape(box_encodings),
           tf.shape(class_predictions_with_background)])
     self.assertAllEqual(box_encodings_shape, [2, 1, 1, 4])
     self.assertAllEqual(class_predictions_with_background_shape, [2, 1, 6])
Example #17
0
 def K(self, X, X2=None):
     if X2 is None:
         d = tf.fill(tf.pack([tf.shape(X)[0]]), tf.squeeze(self.variance))
         return tf.diag(d)
     else:
         shape = tf.pack([tf.shape(X)[0], tf.shape(X2)[0]])
         return tf.zeros(shape, tf.float64)
Example #18
0
def CombineArcAndRootPotentials(arcs, roots):
  """Combines arc and root potentials into a single set of potentials.

  Args:
    arcs: [B,N,N] tensor of batched arc potentials.
    roots: [B,N] matrix of batched root potentials.

  Returns:
    [B,N,N] tensor P of combined potentials where
      P_{b,s,t} = s == t ? roots[b,t] : arcs[b,s,t]
  """
  # All arguments must have statically-known rank.
  check.Eq(arcs.get_shape().ndims, 3, 'arcs must be rank 3')
  check.Eq(roots.get_shape().ndims, 2, 'roots must be a matrix')

  # All arguments must share the same type.
  dtype = arcs.dtype.base_dtype
  check.Same([dtype, roots.dtype.base_dtype], 'dtype mismatch')

  roots_shape = tf.shape(roots)
  arcs_shape = tf.shape(arcs)
  batch_size = roots_shape[0]
  num_tokens = roots_shape[1]
  with tf.control_dependencies([
      tf.assert_equal(batch_size, arcs_shape[0]),
      tf.assert_equal(num_tokens, arcs_shape[1]),
      tf.assert_equal(num_tokens, arcs_shape[2])]):
    return tf.matrix_set_diag(arcs, roots)
Example #19
0
  def test_get_predictions_with_feature_maps_of_dynamic_shape(
      self):
    image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64])
    conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
        is_training=False,
        num_classes=0,
        conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
        depth=32,
        num_layers_before_predictor=1,
        box_code_size=4)
    box_predictions = conv_box_predictor.predict(
        [image_features], num_predictions_per_location=[5],
        scope='BoxPredictor')
    box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS],
                              axis=1)
    objectness_predictions = tf.concat(box_predictions[
        box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
    init_op = tf.global_variables_initializer()

    resolution = 32
    expected_num_anchors = resolution*resolution*5
    with self.test_session() as sess:
      sess.run(init_op)
      (box_encodings_shape,
       objectness_predictions_shape) = sess.run(
           [tf.shape(box_encodings), tf.shape(objectness_predictions)],
           feed_dict={image_features:
                      np.random.rand(4, resolution, resolution, 64)})
      self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4])
      self.assertAllEqual(objectness_predictions_shape,
                          [4, expected_num_anchors, 1])
Example #20
0
def blend_images(data_folder1, data_folder2, out_folder, alpha=.5):
    filename_queue = tf.placeholder(dtype=tf.string)
    label = tf.placeholder(dtype=tf.int32)
    tensor_image = tf.read_file(filename_queue)

    image = tf.image.decode_jpeg(tensor_image, channels=3)

    multiplier = tf.div(tf.constant(224, tf.float32),
                        tf.cast(tf.maximum(tf.shape(image)[0], tf.shape(image)[1]), tf.float32))
    x = tf.cast(tf.round(tf.mul(tf.cast(tf.shape(image)[0], tf.float32), multiplier)), tf.int32)
    y = tf.cast(tf.round(tf.mul(tf.cast(tf.shape(image)[1], tf.float32), multiplier)), tf.int32)
    image = tf.image.resize_images(image, [x, y])

    image = tf.image.rot90(image, k=label)

    image = tf.image.resize_image_with_crop_or_pad(image, 224, 224)
    sess = tf.Session()
    sess.run(tf.local_variables_initializer())
    for root, folders, files in os.walk(data_folder1):
        for each in files:
            if each.find('.jpg') >= 0:
                img1 = Image.open(os.path.join(root, each))
                img2_path = os.path.join(root.replace(data_folder1, data_folder2), each.split("-")[-1])
                rotation = int(each.split("-")[1])
                img2 = sess.run(image, feed_dict={filename_queue: img2_path, label: rotation})
                imsave(os.path.join(os.getcwd(), "temp", "temp.jpg"), img2)
                img2 = Image.open(os.path.join(os.getcwd(), "temp", "temp.jpg"))
                out_image = Image.blend(img1, img2, alpha)
                outfile = os.path.join(root.replace(data_folder1, out_folder), each)
                if not os.path.exists(os.path.split(outfile)[0]):
                    os.makedirs(os.path.split(outfile)[0])
                out_image.save(outfile)
            else:
                print(each)
    sess.close()
 def testDtype(self):
   with self.test_session():
     d = tf.fill([2, 3], 12., name="fill")
     self.assertEqual(d.get_shape(), [2, 3])
     # Test default type for both constant size and dynamic size
     z = tf.ones([2, 3])
     self.assertEqual(z.dtype, tf.float32)
     self.assertEqual([2, 3], z.get_shape())
     self.assertAllEqual(z.eval(), np.ones([2, 3]))
     z = tf.ones(tf.shape(d))
     self.assertEqual(z.dtype, tf.float32)
     self.assertEqual([2, 3], z.get_shape())
     self.assertAllEqual(z.eval(), np.ones([2, 3]))
     # Test explicit type control
     for dtype in (tf.float32, tf.float64, tf.int32,
                   tf.uint8, tf.int16, tf.int8,
                   tf.complex64, tf.complex128, tf.int64, tf.bool):
       z = tf.ones([2, 3], dtype=dtype)
       self.assertEqual(z.dtype, dtype)
       self.assertEqual([2, 3], z.get_shape())
       self.assertAllEqual(z.eval(), np.ones([2, 3]))
       z = tf.ones(tf.shape(d), dtype=dtype)
       self.assertEqual(z.dtype, dtype)
       self.assertEqual([2, 3], z.get_shape())
       self.assertAllEqual(z.eval(), np.ones([2, 3]))
Example #22
0
def gauss_kl_diag(q_mu, q_sqrt, K,  num_latent):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume num_latent independent distributions, given by the columns of
    q_mu and q_sqrt.

    q_mu is a matrix, each column contains a mean

    q_sqrt is a matrix, each column represents the diagonal of a square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.

    num_latent is an integer: the number of independent distributions (equal to
        the columns of q_mu and q_sqrt).
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    KL += num_latent * 0.5 * tf.reduce_sum(
        tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.shape(q_sqrt)[0] * num_latent, tf.float64)
    KL += -0.5 * tf.reduce_sum(tf.log(tf.square(q_sqrt)))  # Log-det of q-cov
    L_inv = tf.matrix_triangular_solve(L, eye(tf.shape(L)[0]), lower=True)
    K_inv = tf.matrix_triangular_solve(tf.transpose(L), L_inv, lower=False)
    KL += 0.5 * tf.reduce_sum(tf.expand_dims(tf.diag_part(K_inv), 1)
                              * tf.square(q_sqrt))  # Trace term.
    return KL
def batch_displacement_warp2d(imgs, vector_fields):
    """
    warp images by free form transformation

    Parameters
    ----------
    imgs : tf.Tensor
        images to be warped
        [n_batch, xlen, ylen, n_channel]
    vector_fields : tf.Tensor
        [n_batch, 2, xlen, ylen]

    Returns
    -------
    output : tf.Tensor
    warped imagees
        [n_batch, xlen, ylen, n_channel]
    """
    n_batch = tf.shape(imgs)[0]
    xlen = tf.shape(imgs)[1]
    ylen = tf.shape(imgs)[2]

    grids = batch_mgrid(n_batch, xlen, ylen)

    T_g = grids + vector_fields
    output = batch_warp2d(imgs, T_g)
    return output
Example #24
0
def autoencoder_5_5(net_in, LATENT_DIMS):
    data_shape = net_in.get_shape().as_list()
    NUM_CHANNELS = data_shape[4]
    strides = [1, 2, 2, 2, 1]
    # Define all encoding layers
    c0 = 5
    c1 = 5
    padding="VALID"
    conv0, W0 = conv3d_layer("conv0",net_in,[c0, c0, c0, NUM_CHANNELS, 64], strides=strides, padding=padding)
    conv1, W1 = conv3d_layer("conv1",conv0,[c1, c1, c1, 64, 128], strides=strides, padding=padding)   

    # Resolve input dim into fc0 from pool1-output
    #LATENT_DIMS = 100
    shape = conv1.get_shape().as_list()
    print "conv1 shape: ", shape
    fc0_inputdim = shape[1]*shape[2]*shape[3]*shape[4]
    fc0 = fc_layer("fc0", conv1, [fc0_inputdim, LATENT_DIMS])

    # Start going back by first reshaping into 4D image data again
    # Then two sets of depooling and convolutions
    fc1 = fc_layer("fc1", fc0, [LATENT_DIMS,fc0_inputdim])
    fc1_reshaped = tf.reshape(fc1, conv1.get_shape().as_list())
    
    deconv0 = conv3d_layer_transpose("deconv0", fc1_reshaped, W1, output_shape=tf.shape(conv0), strides=strides, padding=padding)
    deconv1 = conv3d_layer_transpose("deconv1", deconv0, W0, output_shape=tf.shape(net_in), strides=strides, padding=padding)
    
    return deconv1
Example #25
0
def gauss_kl(q_mu, q_sqrt, K):
    """
    Compute the KL divergence from

          q(x) = N(q_mu, q_sqrt^2)
    to
          p(x) = N(0, K)

    We assume multiple independent distributions, given by the columns of
    q_mu and the last dimension of q_sqrt.

    q_mu is a matrix, each column contains a mean.

    q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root
        matrix of the covariance of q.

    K is a positive definite matrix: the covariance of p.
    """
    L = tf.cholesky(K)
    alpha = tf.matrix_triangular_solve(L, q_mu, lower=True)
    KL = 0.5 * tf.reduce_sum(tf.square(alpha))  # Mahalanobis term.
    num_latent = tf.cast(tf.shape(q_sqrt)[2], float_type)
    KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square(tf.diag_part(L))))  # Prior log-det term.
    KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]), float_type)  # constant term
    Lq = tf.matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0)  # force lower triangle
    KL += -0.5*tf.reduce_sum(tf.log(tf.square(tf.matrix_diag_part(Lq))))  # logdet
    L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1]))
    LiLq = tf.matrix_triangular_solve(L_tiled, Lq, lower=True)
    KL += 0.5 * tf.reduce_sum(tf.square(LiLq))  # Trace term
    return KL
Example #26
0
def ae_latent_sample_beam(latents_dense_in, inputs, ed, embed, hparams):
  """Sample from the latent space in the autoencoder."""
  vocab_size = 2**hparams.z_size
  beam_size = 1  # TODO(lukaszkaiser): larger beam sizes seem to work bad.
  inputs = tf.tile(inputs, [beam_size, 1, 1])
  ed = tf.tile(ed, [beam_size, 1, 1, 1])

  def symbols_to_logits_fn(ids):
    """Go from ids to logits."""
    ids = tf.expand_dims(ids, axis=2)  # Ids start with added all-zeros.
    latents_discrete = tf.pad(ids[:, 1:], [[0, 0], [0, 1], [0, 0]])

    with tf.variable_scope(tf.get_variable_scope(), reuse=False):
      latents_dense = embed(latents_discrete)
      latents_pred = decode_transformer(
          inputs, ed, latents_dense, hparams, "extra")
      logits = tf.layers.dense(latents_pred, vocab_size, name="extra_logits")
      current_output_position = common_layers.shape_list(ids)[1] - 1
      logits = logits[:, current_output_position, :, :]
    return tf.squeeze(logits, axis=[1])

  initial_ids = tf.zeros([tf.shape(latents_dense_in)[0]], dtype=tf.int32)
  length = tf.shape(latents_dense_in)[1]
  ids, _ = beam_search.beam_search(
      symbols_to_logits_fn, initial_ids, beam_size, length,
      vocab_size, alpha=0.0, eos_id=-1, stop_early=False)

  res = tf.expand_dims(ids[:, 0, :], axis=2)  # Pick first beam.
  return res[:, 1:]  # Remove the added all-zeros from ids.
Example #27
0
def autoencoder_3_3_3(net_in, LATENT_DIMS):
    data_shape = net_in.get_shape().as_list()
    NUM_CHANNELS = data_shape[4]
    strides = [1, 2, 2, 2, 1]
    # Define all encoding layers
    c0 = 3
    c1 = 3
    c2 = 3
    conv0, W0 = conv3d_layer("aeconv0",net_in,[c0, c0, c0, NUM_CHANNELS, 32], strides=strides)
    conv1, W1 = conv3d_layer("aeconv1",conv0,[c1, c1, c1, 32, 64], strides=strides)   
    conv2, W2 = conv3d_layer("aeconv2",conv1,[c2, c2, c2, 64, 64], strides=[1, 1, 1, 1, 1])
    
    #W0 = tf.Variable(xavier_conv3_init(W0_old.get_shape().as_list()), name='weights')
    #W1 = tf.Variable(xavier_conv3_init(W1_old.get_shape().as_list()), name='weights')
    #W2 = tf.Variable(xavier_conv3_init(W2_old.get_shape().as_list()), name='weights')
    
    # Resolve input dim into fc0 from pool1-output
    #LATENT_DIMS = 100
    shape = conv2.get_shape().as_list()
    print "conv2 shape: ", shape
    fc0_inputdim = shape[1]*shape[2]*shape[3]*shape[4]
    fc0 = fc_layer("aefc0", conv2, [fc0_inputdim, LATENT_DIMS])

    # Start going back by first reshaping into 4D image data again
    # Then two sets of depooling and convolutions
    fc1 = fc_layer("aefc1", fc0, [LATENT_DIMS,fc0_inputdim])
    fc1_reshaped = tf.reshape(fc1, conv2.get_shape().as_list())
    
    deconv0 = conv3d_layer_transpose("aedeconv0", fc1_reshaped, W2, output_shape=tf.shape(conv1), strides=[1, 1, 1, 1, 1])
    deconv1 = conv3d_layer_transpose("aedeconv1", deconv0, W1, output_shape=tf.shape(conv0), strides=strides)
    deconv2 = conv3d_layer_transpose("aedeconv2", deconv1, W0, output_shape=tf.shape(net_in), strides=strides)
    
    return deconv2
Example #28
0
    def cross_entropy(u, label_u, alpha=0.5, normed=False):

        label_ip = tf.cast(
            tf.matmul(label_u, tf.transpose(label_u)), tf.float32)
        s = tf.clip_by_value(label_ip, 0.0, 1.0)

        # compute balance param
        # s_t \in {-1, 1}
        s_t = tf.multiply(tf.add(s, tf.constant(-0.5)), tf.constant(2.0))
        sum_1 = tf.reduce_sum(s)
        sum_all = tf.reduce_sum(tf.abs(s_t))
        balance_param = tf.add(tf.abs(tf.add(s, tf.constant(-1.0))),
                               tf.multiply(tf.div(sum_all, sum_1), s))

        if normed:
            # ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1)
            ip_1 = tf.matmul(u, tf.transpose(u))

            def reduce_shaper(t):
                return tf.reshape(tf.reduce_sum(t, 1), [tf.shape(t)[0], 1])
            mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)),
                                      reduce_shaper(tf.square(u)), transpose_b=True))
            ip = tf.div(ip_1, mod_1)
        else:
            ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1)
        ones = tf.ones([tf.shape(u)[0], tf.shape(u)[0]])
        return tf.reduce_mean(tf.multiply(tf.log(ones + tf.exp(alpha * ip)) - s * alpha * ip, balance_param))
    def loop(q_, mask, mass_, found_):
        q_list = tf.dynamic_partition(q_, mask, 2)
        condition_indices = tf.dynamic_partition(tf.range(tf.shape(q_)[0]), mask, 2)  # 0 element it False,
        #  1 element if true

        p = q_list[1] * (1.0 - mass_) / tf.reduce_sum(q_list[1])
        p_new = tf.dynamic_stitch(condition_indices, [q_list[0], p])

        # condition verification and mask modification
        less_mask = tf.cast(tf.less(u, p_new), tf.int32)  # 0 when u is bigger than p, 1 when u is less than p
        condition_indices = tf.dynamic_partition(tf.range(tf.shape(p_new)[0]), less_mask,
                                                 2)  # 0 when u is bigger than p, 1 when u is less than p

        split_p_new = tf.dynamic_partition(p_new, less_mask, 2)
        split_u = tf.dynamic_partition(u, less_mask, 2)

        alpha = tf.dynamic_stitch(condition_indices, [split_p_new[0], split_u[1]])
        mass_ += tf.reduce_sum(split_u[1])

        mask = mask * (tf.ones_like(less_mask) - less_mask)

        found_ = tf.cond(tf.equal(tf.reduce_sum(less_mask), 0),
                         lambda: False,
                         lambda: True)

        alpha = tf.reshape(alpha, q_.shape)

        return alpha, mask, mass_, found_
Example #30
0
def one_hot_matrix(tensor_in, num_classes, on_value=1.0, off_value=0.0):
    """Encodes indices from given tensor as one-hot tensor.

    TODO(ilblackdragon): Ideally implementation should be
    part of TensorFlow with Eigen-native operation.

    Args:
        tensor_in: Input tensor of shape [N1, N2].
        num_classes: Number of classes to expand index into.
        on_value: Tensor or float, value to fill-in given index.
        off_value: Tensor or float, value to fill-in everything else.
    Returns:
        Tensor of shape [N1, N2, num_classes] with 1.0 for each id in original
        tensor.
    """
    tensor_in = tf.convert_to_tensor(tensor_in)
    sparse_values = tf.to_int64(tf.reshape(tensor_in, [-1, 1]))
    size = tf.shape(sparse_values)[0]
    dims = tf.shape(tensor_in)
    indices = tf.to_int64(tf.reshape(tf.range(0, size), [-1, 1]))
    indices_values = tf.concat(1, [indices, sparse_values])
    outshape = tf.to_int64(expand_concat(0, [size, num_classes]))
    one_hot_vector = tf.sparse_to_dense(indices_values, outshape, on_value, off_value)
    ret = tf.reshape(one_hot_vector, tf.concat(0, [dims, [num_classes]]))
    ret.set_shape(tensor_in.get_shape().concatenate(num_classes))
    return ret
 def filter_too_long(features, labels):
     tf.less_equal(tf.shape(features["inputs"])[1], hparams.max_input_length)
    def decode_infer(self, inputs, state):
        # state['enc']: [b * beam, l_s, e]  ,   state['dec']: [b * beam, q', e]
        # q' = previous decode output length
        # during infer, following graph are constructed using beam search
        with self.graph.as_default():
            config = self.bert_config

            target_sequence = inputs['target']  # [b * beam, q']
            vocab_size = len(self.hps.vocab_out)
            # trunct word idx, change those greater than vocab_size to unkId
            shape = target_sequence.shape
            unkid = self.hps.vocab_out[self.hps.unk]
            # target_sequence = tf_trunct(target_sequence, vocab_size, self.hps.unkId)
            target_sequence = tf_trunct(target_sequence, vocab_size, unkid)
            target_sequence.set_shape(shape)

            target_length = inputs['target_length']
            target_seg_ids = tf.zeros_like(target_sequence, dtype=tf.int32, name='target_seg_ids_infer')
            tgt_mask = tf.sequence_mask(target_length,
                                        maxlen=tf.shape(target_sequence)[1],
                                        dtype=tf.float32)  # [b, q']

            # with tf.variable_scope('bert', reuse=True):
            out_dict_size = len(self.hps.vocab_out)
            with tf.variable_scope('bert', reuse=True):
                with tf.variable_scope('embeddings'), tf.device('/cpu:0'):
                    # Perform embedding lookup on the target word ids.
                    (tgt_embed, _) = embedding_lookup(
                        input_ids=target_sequence,
                        vocab_size=out_dict_size,  # out vocab size
                        embedding_size=config.hidden_size,
                        initializer_range=config.initializer_range,
                        word_embedding_name='word_embeddings',
                        use_one_hot_embeddings=False)

                    # Add positional embeddings and token type embeddings, then layer
                    # normalize and perform dropout.
                    tgt_embed = embedding_postprocessor(
                        input_tensor=tgt_embed,
                        use_token_type=True,
                        token_type_ids=target_seg_ids,
                        token_type_vocab_size=config.type_vocab_size,
                        token_type_embedding_name='token_type_embeddings',
                        use_position_embeddings=True,
                        position_embedding_name='position_embeddings',
                        initializer_range=config.initializer_range,
                        max_position_embeddings=config.max_position_embeddings,
                        dropout_prob=config.hidden_dropout_prob)
            
            
            with tf.variable_scope('decode', reuse=True):
                # [b, q', e]
                masked_tgt_embed = tgt_embed * tf.expand_dims(tgt_mask, -1)
                dec_attn_bias = attention_bias(tf.shape(masked_tgt_embed)[1], "causal")
                decoder_input = tf.pad(masked_tgt_embed, [[0, 0], [1, 0], [0, 0]])[:, :-1, :]  # Shift left

                infer_decoder_input = decoder_input[:, -1:, :]
                infer_dec_attn_bias = dec_attn_bias[:, :, -1:, :]

                ret = transformer_decoder_three(infer_decoder_input,
                                          self.enc_output,
                                          self.topic_memory,
                                          infer_dec_attn_bias,
                                          self.enc_attn_bias,
                                          self.hps,
                                          state=state['decoder'])

                all_att_weights, decoder_output, decoder_state = ret
                decoder_output = decoder_output[:, -1, :]  # [b * beam, e]
                vocab_logits = tf.matmul(decoder_output, self.decoder_weights, False, True)  # [b * beam, v]
                vocab_probs = tf.nn.softmax(vocab_logits)
                vocab_size = out_dict_size  # out vocabsize
                # we have tiled source_id_oo before feed, so last argument is set to 1
                with tf.variable_scope('copy'):
                    logits = calculate_final_logits(decoder_output, all_att_weights,
                                                    vocab_probs,
                                                    self.input_ids_oo, self.max_out_oovs, self.input_mask, vocab_size,
                                                    tgt_seq_len=1)
                log_prob = tf.log(logits)  # [b * beam, v + v']
        return log_prob, {'encoder': state['encoder'], 'decoder': decoder_state}
    def _build_summarization_model(self):
        is_training = self.is_training
        config = self.bert_config

        gpu_pred_ids = []
        gpu_logits = []
        gpu_train_encoded = []
        gpu_loss = []
        gpu_out_embed = []
        gpu_grads = []
        self._add_placeholders()
        self._n_gpu_split_placeholders(self.hps.n_gpu)
        
        for i in range(self.hps.n_gpu):
            do_reuse = True if i > 0 else None
            with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse):
                
                '''Creates a classification model.'''
                model = modeling.BertModel(
                    config=self.bert_config,
                    is_training=is_training,
                    input_ids=self.input_ids_ngpu[i],
                    input_mask=self.input_mask_ngpu[i],
                    token_type_ids=self.segment_ids_ngpu[i],
                    use_one_hot_embeddings=self.hps.use_tpu)  # use_one_hot_embeddings=Flags.tpu ?

                encoder_output = model.get_sequence_output()  # [b, l_s, h]

                hidden_size = encoder_output.shape[2].value
                encoder_out_length = tf.shape(encoder_output)[1]
                
                expand_topic_id = tf.expand_dims(self.topic_ids_ngpu[i], -1)
                topic_input_sequence = tf.tile(expand_topic_id, [1, encoder_out_length]) 
                with tf.variable_scope('topic'):
                    with tf.variable_scope('embeddings'), tf.device('/cpu:0'):
                        # Perform embedding lookup on the target word ids.
                        (self.topic_embed, self.topic_embeddings) = embedding_lookup(
                            input_ids=topic_input_sequence,  # here the embedding input of decoder have to be output_ids
                            vocab_size=self.hps.num_topic,  # decode dictionary modified
                            embedding_size=self.hps.topic_embedding_size,
                            initializer_range=config.initializer_range,
                            word_embedding_name='topic_embeddings',
                            use_one_hot_embeddings=False)
                        print('!!!!topic_embeddings', self.topic_embeddings, self.topic_embed)
                        
                self.encoder_output = tf.concat([encoder_output, self.topic_embed], -1)

                self.enc_attn_bias = attention_bias(self.input_mask_ngpu[i], 'masking')
                
                out_dict_size = len(self.hps.vocab_out)
                ## for topic word memory
                with tf.variable_scope('bert', reuse=True):
                    with tf.variable_scope('embeddings'), tf.device('/cpu:0'):
                        # Perform embedding lookup on the target word ids.
                        (self.topic_word_memory, _) = embedding_lookup(
                            input_ids=self.topic_words_ids_ngpu[i],  # here the embedding input of decoder have to be output_ids
                            vocab_size=out_dict_size,  # decode dictionary modified
                            embedding_size=config.hidden_size,
                            initializer_range=config.initializer_range,
                            word_embedding_name='word_embeddings',
                            use_one_hot_embeddings=False)
                        
                        self.topic_word_memory = embedding_postprocessor(
                            input_tensor=self.topic_word_memory,
                            use_token_type=True,
                            token_type_ids=self.mem_segment_ids_ngpu[i],
                            token_type_vocab_size=config.type_vocab_size,
                            token_type_embedding_name='token_type_embeddings',
                            use_position_embeddings=False,
                            position_embedding_name='position_embeddings',
                            initializer_range=config.initializer_range,
                            max_position_embeddings=config.max_position_embeddings,
                            dropout_prob=config.hidden_dropout_prob)

                with tf.variable_scope('bert', reuse=True):
                    with tf.variable_scope('embeddings'), tf.device('/cpu:0'):
                        # Perform embedding lookup on the target word ids.
                        (self.out_embed, self.bert_embeddings) = embedding_lookup(
                            input_ids=self.output_ids_ngpu[i],  # here the embedding input of decoder have to be output_ids
                            vocab_size=out_dict_size,  # decode dictionary modified
                            embedding_size=config.hidden_size,
                            initializer_range=config.initializer_range,
                            word_embedding_name='word_embeddings',
                            use_one_hot_embeddings=False)

                        # Add positional embeddings and token type embeddings, then layer
                        # normalize and perform dropout.
                        self.out_embed = embedding_postprocessor(
                            input_tensor=self.out_embed,
                            use_token_type=True,
                            token_type_ids=self.out_segment_ids_ngpu[i],
                            token_type_vocab_size=config.type_vocab_size,
                            token_type_embedding_name='token_type_embeddings',
                            use_position_embeddings=True,
                            position_embedding_name='position_embeddings',
                            initializer_range=config.initializer_range,
                            max_position_embeddings=config.max_position_embeddings,
                            dropout_prob=config.hidden_dropout_prob)

                with tf.variable_scope('decode'):
                    self.decoder_weights = self.bert_embeddings
                    self.masked_out_embed = self.out_embed * tf.expand_dims(self.output_mask_ngpu[i], -1)
                    self.dec_attn_bias = attention_bias(tf.shape(self.masked_out_embed)[1], 'causal')
                    self.decoder_input = tf.pad(self.masked_out_embed, [[0, 0], [1, 0], [0, 0]])[:, :-1, :]  # Shift left
                    self.all_att_weights, self.decoder_output = transformer_decoder_three(self.decoder_input,
                                                                                    self.encoder_output,
                                                                                    self.topic_word_memory,
                                                                                    self.dec_attn_bias,
                                                                                    self.enc_attn_bias,
                                                                                    self.hps)
                    # [b, l_t, e] => [b*l_t, v]
                    self.decoder_output = tf.reshape(self.decoder_output, [-1, hidden_size])
                    self.vocab_logits = tf.matmul(self.decoder_output, self.decoder_weights, False, True)  # (b * l_t, v)
                    self.vocab_probs = tf.nn.softmax(self.vocab_logits)  # [b * l_t, v]
                    # vocab_size = len(self.hps.vocab)
                    with tf.variable_scope('copy'):
                        self.single_logits = calculate_final_logits(self.decoder_output, self.all_att_weights, self.vocab_probs,
                                                             self.input_ids_oo_ngpu[i], self.max_out_oovs, self.input_mask_ngpu[i],
                                                             out_dict_size,
                                                             self.tiled_len)  # [b * l_t, v + v']
                        self.single_pred_ids = tf.reshape(tf.argmax(self.single_logits, axis=-1), [self.batch_size, -1])
                
                with tf.variable_scope('loss'):
                    self.single_ce = smooth_cross_entropy(
                        self.single_logits,
                        self.output_label_ngpu[i],
                        self.hps.label_smoothing)

                    self.single_ce = tf.reshape(self.single_ce, tf.shape(self.output_label_ngpu[i]))  # [b, l_t]

                    self.single_loss = tf.reduce_sum(self.single_ce * self.output_mask_ngpu[i]) / tf.reduce_sum(self.output_mask_ngpu[i])  # scalar
            
                gpu_pred_ids.append(self.single_pred_ids)
                gpu_logits.append(self.single_logits)
                gpu_train_encoded.append(self.encoder_output)
                gpu_loss.append(self.single_loss)
                gpu_out_embed.append(self.out_embed)
                params = tf.trainable_variables()
                grads = tf.gradients(self.single_loss, params)
                grads = list(zip(grads, params))
                gpu_grads.append(grads)
                #gpu_ops.append([loss, logits])

         
 
        self.pred_ids = tf.concat(gpu_pred_ids, axis=0)
        self.logits = tf.concat(gpu_logits, axis=0)
        self.loss = tf.reduce_mean(gpu_loss)
        self.encoder_output = tf.concat(gpu_train_encoded, axis=0)
        self.out_embed = tf.concat(gpu_out_embed, axis=0)
        # end for
        grads = sum_grads(gpu_grads)
        grads = [g for g, p in grads]
        self.total_gradient = grads
        
        
        tf.summary.scalar('loss', self.loss)
Example #34
0
def knot_weights(positions,
                 num_knots,
                 degree,
                 cyclical,
                 sparse_mode=False,
                 name=None):
  """Function that converts cardinal B-spline positions to knot weights.

  Note:
    In the following, A1 to An are optional batch dimensions.

  Args:
    positions: A tensor with shape `[A1, .. An]`. Positions must be between
      `[0, C - D)` for non-cyclical and `[0, C)` for cyclical splines, where `C`
      is the number of knots and `D` is the spline degree.
    num_knots: A strictly positive `int` describing the number of knots in the
      spline.
    degree: An `int` describing the degree of the spline, which must be smaller
      than `num_knots`.
    cyclical: A `bool` describing whether the spline is cyclical.
    sparse_mode: A `bool` describing whether to return a result only for the
      knots with nonzero weights. If set to True, the function returns the
      weights of only the `degree` + 1 knots that are non-zero, as well as the
      indices of the knots.
    name: A name for this op. Defaults to "bspline_knot_weights".

  Returns:
    A tensor with dense weights for each control point, with the shape
    `[A1, ... An, C]` if `sparse_mode` is False.
    Otherwise, returns a tensor of shape `[A1, ... An, D + 1]` that contains the
    non-zero weights, and a tensor with the indices of the knots, with the type
    tf.int32.

  Raises:
    ValueError: If degree is greater than 4 or num_knots - 1, or less than 0.
    InvalidArgumentError: If positions are not in the right range.
  """
  with tf.compat.v1.name_scope(name, "bspline_knot_weights", [positions]):
    positions = tf.convert_to_tensor(value=positions)

    if degree > 4 or degree < 0:
      raise ValueError("Degree should be between 0 and 4.")
    if degree > num_knots - 1:
      raise ValueError("Degree cannot be >= number of knots.")
    if cyclical:
      positions = asserts.assert_all_in_range(positions, 0.0, float(num_knots))
    else:
      positions = asserts.assert_all_in_range(positions, 0.0,
                                              float(num_knots - degree))

    all_basis_functions = {
        # Maps valid degrees to functions.
        Degree.CONSTANT: _constant,
        Degree.LINEAR: _linear,
        Degree.QUADRATIC: _quadratic,
        Degree.CUBIC: _cubic,
        Degree.QUARTIC: _quartic
    }
    basis_functions = all_basis_functions[degree]

    if not cyclical and num_knots - degree == 1:
      # In this case all weights are non-zero and we can just return them.
      if not sparse_mode:
        return basis_functions(positions)
      else:
        shift = tf.zeros_like(positions, dtype=tf.int32)
        return basis_functions(positions), shift

    # shape_batch = positions.shape.as_list()
    shape_batch = tf.shape(input=positions)
    positions = tf.reshape(positions, shape=(-1,))

    # Calculate the nonzero weights from the decimal parts of positions.
    shift = tf.floor(positions)
    sparse_weights = basis_functions(positions - shift)
    shift = tf.cast(shift, tf.int32)

    if sparse_mode:
      # Returns just the weights and the shift amounts, so that tf.gather_nd on
      # the knots can be used to sparsely activate knots if needed.
      shape_weights = tf.concat(
          (shape_batch, tf.constant((degree + 1,), dtype=tf.int32)), axis=0)
      sparse_weights = tf.reshape(sparse_weights, shape=shape_weights)
      shift = tf.reshape(shift, shape=shape_batch)
      return sparse_weights, shift

    num_positions = tf.size(input=positions)
    ind_row, ind_col = tf.meshgrid(
        tf.range(num_positions, dtype=tf.int32),
        tf.range(degree + 1, dtype=tf.int32),
        indexing="ij")

    tiled_shifts = tf.reshape(
        tf.tile(tf.expand_dims(shift, axis=-1), multiples=(1, degree + 1)),
        shape=(-1,))
    ind_col = tf.reshape(ind_col, shape=(-1,)) + tiled_shifts
    if cyclical:
      ind_col = tf.math.mod(ind_col, num_knots)
    indices = tf.stack((tf.reshape(ind_row, shape=(-1,)), ind_col), axis=-1)
    shape_indices = tf.concat((tf.reshape(
        num_positions, shape=(1,)), tf.constant(
            (degree + 1, 2), dtype=tf.int32)),
                              axis=0)
    indices = tf.reshape(indices, shape=shape_indices)
    shape_scatter = tf.concat((tf.reshape(
        num_positions, shape=(1,)), tf.constant((num_knots,), dtype=tf.int32)),
                              axis=0)
    weights = tf.scatter_nd(indices, sparse_weights, shape_scatter)
    shape_weights = tf.concat(
        (shape_batch, tf.constant((num_knots,), dtype=tf.int32)), axis=0)
    return tf.reshape(weights, shape=shape_weights)
max_sent_len = seq_train_stories.shape[2]


# SEQUENTIAL CONFIGURATION 

SEQ_EPOCHS = 1                     # EPOCHS
seq_num_units = 20                    # number of units in each LSTMCell
seq_num_layers = 2                     # number of stacked LSTMs
SEQ_KEEP_PRB = 0.9                    # dropout probability of keeping value
bidirectional = True           # enable bidirectional output layer


seq_story = tf.placeholder(tf.int64, [None, None, None], "seq_story")        # [seq_batch_size x 5 x max_seq_length]
seq_order = tf.placeholder(tf.int64, [None, None], "seq_order")              # [seq_batch_size x 5]
seq_lens = tf.placeholder(tf.int64, [None, None], "seq_lens")     # [seq_batch_size x 5]
seq_batch_size = tf.shape(seq_story)[0]
seq_keep_prob = tf.placeholder(tf.float64)          # dropout probability placeholder

with tf.variable_scope("seq"):
    # Word embeddings
    sentences = [tf.reshape(x, [seq_batch_size, -1]) for x in tf.split(1, 5, seq_story)]  # 5 times [seq_batch_size x max_sent_len]
    embeddings = tf.get_variable("embeddings", initializer=embeds, trainable=True)
    inputs = [tf.nn.embedding_lookup(embeddings, sentence)   # 5 times [seq_batch_size x max_sent_len x embedding_size]
                          for sentence in sentences]

with tf.variable_scope("lstms") as varscope:
    # first LSTM
    index = 0
    lstm1 = tf.nn.rnn_cell.LSTMCell(seq_num_units, state_is_tuple=True)
    lstm1 = tf.nn.rnn_cell.MultiRNNCell([lstm1] * seq_num_layers)
    lstm1 = tf.nn.rnn_cell.DropoutWrapper(lstm1, output_keep_prob=seq_keep_prob)
Example #36
0
def unpool(inputs):
    return tf.image.resize_bilinear(
        inputs, size=[tf.shape(inputs)[1] * 2,
                      tf.shape(inputs)[2] * 2])
Example #37
0
learning_rate_decay = 0.9
min_learning_rate = 0.0001
keep_probablity = 0.5

#defining a session
tf.reset_default_graph()
session = tf.InteractiveSession()

# load model inputs
inputs, targets, lr, keep_prob = model_inputs()

# setting the sequence length
sequence_length = tf.placeholder_with_default(25, None, name='sequence_length')

# getting the shape of input tensor
input_shape = tf.shape(inputs)

# getting the training and test predictions

training_predictions, test_predictions = seq2seq_model(
    tf.reverse(inputs, [-1]), targets, keep_prob, batch_size, sequence_length,
    len(answerswords2int), len(questionswords2int), encoding_embedding_size,
    decoding_embedding_size, rnn_size, num_layers, questionswords2int)
# setting up the loss error, the optimizer and gradient clipping
with tf.name_scope("optimization"):
    loss_error = tf.contrib.seq2seq.sequence_loss(
        training_predictions, targets,
        tf.ones([input_shape[0], sequence_length]))
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = optimizer.compute_gradients(loss_error)
    clipped_gradients = [(tf.clip_by_value(grad_tensor, -5.,
Example #38
0
def simulate_step(batch_env, algo, log=True, reset=False):
  """Simulation step of a vectorized algorithm with in-graph environments.

  Integrates the operations implemented by the algorithm and the environments
  into a combined operation.

  Args:
    batch_env: In-graph batch environment.
    algo: Algorithm instance implementing required operations.
    log: Tensor indicating whether to compute and return summaries.
    reset: Tensor causing all environments to reset.

  Returns:
    Tuple of tensors containing done flags for the current episodes, possibly
    intermediate scores for the episodes, and a summary tensor.
  """

  def _define_begin_episode(agent_indices):
    """Reset environments, intermediate scores and durations for new episodes.

    Args:
      agent_indices: Tensor containing batch indices starting an episode.

    Returns:
      Summary tensor, new score tensor, and new length tensor.
    """
    assert agent_indices.shape.ndims == 1
    zero_scores = tf.zeros_like(agent_indices, tf.float32)
    zero_durations = tf.zeros_like(agent_indices)
    update_score = tf.scatter_update(score_var, agent_indices, zero_scores)
    update_length = tf.scatter_update(
        length_var, agent_indices, zero_durations)
    reset_ops = [
        batch_env.reset(agent_indices), update_score, update_length]
    with tf.control_dependencies(reset_ops):
      return algo.begin_episode(agent_indices), update_score, update_length

  def _define_step():
    """Request actions from the algorithm and apply them to the environments.

    Increments the lengths of all episodes and increases their scores by the
    current reward. After stepping the environments, provides the full
    transition tuple to the algorithm.

    Returns:
      Summary tensor, new score tensor, and new length tensor.
    """
    prevob = batch_env.observ + 0  # Ensure a copy of the variable value.
    agent_indices = tf.range(len(batch_env))
    action, step_summary = algo.perform(agent_indices, prevob)
    action.set_shape(batch_env.action.shape)
    with tf.control_dependencies([batch_env.step(action)]):
      add_score = score_var.assign_add(batch_env.reward)
      inc_length = length_var.assign_add(tf.ones(len(batch_env), tf.int32))
    with tf.control_dependencies([add_score, inc_length]):
      agent_indices = tf.range(len(batch_env))
      experience_summary = algo.experience(
          agent_indices, prevob,
          batch_env.action,
          batch_env.reward,
          batch_env.done,
          batch_env.observ)
      summary = tf.summary.merge([step_summary, experience_summary])
    return summary, add_score, inc_length

  def _define_end_episode(agent_indices):
    """Notify the algorithm of ending episodes.

    Also updates the mean score and length counters used for summaries.

    Args:
      agent_indices: Tensor holding batch indices that end their episodes.

    Returns:
      Summary tensor.
    """
    assert agent_indices.shape.ndims == 1

    submit_score = mean_score.submit(tf.gather(score, agent_indices))
    submit_length = mean_length.submit(
        tf.cast(tf.gather(length, agent_indices), tf.float32))
        
    close_env = tf.py_func(batch_env.close, [], [])

    with tf.control_dependencies([submit_score, submit_length]):
      return algo.end_episode(agent_indices)

  def _define_summaries():
    """Reset the average score and duration, and return them as summary.

    Returns:
      Summary string.
    """
    score_summary = tf.cond(
        tf.logical_and(log, tf.cast(mean_score.count, tf.bool)),
        lambda: tf.summary.scalar('mean_score', mean_score.clear()), str)
    length_summary = tf.cond(
        tf.logical_and(log, tf.cast(mean_length.count, tf.bool)),
        lambda: tf.summary.scalar('mean_length', mean_length.clear()), str)
    return tf.summary.merge([score_summary, length_summary])

  with tf.name_scope('simulate'):
    log = tf.convert_to_tensor(log)
    reset = tf.convert_to_tensor(reset)
    with tf.variable_scope('simulate_temporary'):
      score_var = tf.get_variable(
          'score', (len(batch_env),), tf.float32,
          tf.constant_initializer(0),
          trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
      length_var = tf.get_variable(
          'length', (len(batch_env),), tf.int32,
          tf.constant_initializer(0),
          trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
    mean_score = streaming_mean.StreamingMean((), tf.float32, 'mean_score')
    mean_length = streaming_mean.StreamingMean((), tf.float32, 'mean_length')
    agent_indices = tf.cond(
        reset,
        lambda: tf.range(len(batch_env)),
        lambda: tf.cast(tf.where(batch_env.done)[:, 0], tf.int32))
    begin_episode, score, length = tf.cond(
        tf.cast(tf.shape(agent_indices)[0], tf.bool),
        lambda: _define_begin_episode(agent_indices),
        lambda: (str(), score_var, length_var))
    with tf.control_dependencies([begin_episode]):
      step, score, length = _define_step()
    with tf.control_dependencies([step]):
      agent_indices = tf.cast(tf.where(batch_env.done)[:, 0], tf.int32)
      end_episode = tf.cond(
          tf.cast(tf.shape(agent_indices)[0], tf.bool),
          lambda: _define_end_episode(agent_indices), str)
    with tf.control_dependencies([end_episode]):
      summary = tf.summary.merge([
          _define_summaries(), begin_episode, step, end_episode])
    with tf.control_dependencies([summary]):
      score = 0.0 + score
      done = batch_env.done
    return done, score, summary
Example #39
0
def gram(x):
    shape_x = tf.shape(x)
    b = shape_x[0]
    c = shape_x[3]
    x = tf.reshape(x, [b, -1, c])
    return tf.matmul(tf.transpose(x, [0, 2, 1]), x) / tf.cast((tf.size(x) // b), tf.float32)
 def max_length(self):
     time_dim = 0 if self.time_major_optimization else 1
     return tf.shape(self.inputs)[time_dim]
Example #41
0
    def __call__(self,
                 input_,
                 n_caps,
                 kernel_size=9,
                 stride_size=2,
                 route_iter=3,
                 reuse=False,
                 initializer=tf.truncated_normal_initializer(stddev=0.02)):
        input_shape = input_.get_shape().as_list()
        batch_size = tf.shape(input_)[0]
        #[None, height, width, channel]
        assert len(input_shape) >= 2

        if 'primary' in self.name:  #Primary Capsule
            with tf.variable_scope(self.name) as scope:
                if reuse == True: scope.reuse_variables()

                if len(input_shape) == 3:
                    input_ = tf.expand_dims(input_,
                                            axis=-1)  #for gray scale error
                self.input_shape = input_.get_shape().as_list()

                capsules = conv2d(input_,
                                  n_caps * self.d_caps,
                                  kernel_h=kernel_size,
                                  stride_h=stride_size,
                                  initializer=initializer)
                #capsule: [None, height, width, channel]
                capsule_shape = capsules.get_shape().as_list()
                total_num_cap = int(
                    (capsule_shape[1] * capsule_shape[2] * capsule_shape[3]) /
                    self.d_caps)
                # for preventing error from getting shape in digit caps. I defined the number of capsules to reshape

                capsules = tf.reshape(capsules,
                                      [batch_size, total_num_cap, self.d_caps])
                print(capsules)
                capsules = squash(capsules)
                print(capsules)
                return capsules

        else:  #Digit Capsule
            with tf.variable_scope(self.name) as scope:
                if reuse == True: scope.reuse_variables()
                #let assume input_: [batch_size, # of capsules, d-capsules]
                #if len(input_shape) ==2 : input_ = tf.expand_dims(input_, axis=-1)
                self.input_shape = input_.get_shape().as_list()
                input_tiled = tf.expand_dims(
                    input_, axis=-1, name='input_expand_1'
                )  #[batch_size, # of capsule, ..., d-capsules, 1]
                input_tiled = tf.expand_dims(
                    input_tiled, axis=2, name='input_expand_2'
                )  #[batch_size, # of capsule, ..., d-capsules, 1]
                print(input_tiled)
                input_tiled = tf.tile(
                    input_tiled, [1, 1, n_caps, 1, 1], name='input_tile'
                )  #[batch_size, # of capsule, # of next capsule, d_capsules, 1]
                print(input_tiled)

                W = tf.get_variable('prediction_w', [
                    1, self.input_shape[1], n_caps, self.d_caps,
                    self.input_shape[2]
                ],
                                    initializer=initializer)
                W_tiled = tf.tile(W, [batch_size, 1, 1, 1, 1], name='W_tiled')

                prediction_vectors = tf.matmul(W_tiled, input_tiled)

                b = tf.zeros([batch_size, self.input_shape[1], n_caps, 1, 1])

                for i in range(route_iter):
                    coupling_coeff = tf.nn.softmax(b, dim=2)

                    s = tf.multiply(prediction_vectors,
                                    coupling_coeff,
                                    name='weighted_prediction')
                    sum_s = tf.reduce_sum(s,
                                          axis=1,
                                          keep_dims=True,
                                          name='weighted_sum')
                    capsules = squash(
                        sum_s, axis=-2
                    )  #(None, 1, # of nex capsule capsule, d_capsule, 1)
                    caps_out_tile = tf.tile(capsules,
                                            [1, self.input_shape[1], 1, 1, 1],
                                            name='capsule_output_tiled')

                    a = tf.matmul(prediction_vectors,
                                  caps_out_tile,
                                  transpose_a=True,
                                  name='agreement')
                    b = tf.add(b, a, name='update_logit')

                capsules = tf.reshape(capsules,
                                      [batch_size, n_caps, self.d_caps])
                print(capsules)
                #return tf.squeeze(capsules)
                return capsules
Example #42
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_real_example = None
        if "is_real_example" in features:
            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)
        else:
            is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        (total_loss, per_example_loss, logits,
         probabilities) = create_model(bert_config, is_training, input_ids,
                                       input_mask, segment_ids, label_ids,
                                       num_labels, use_one_hot_embeddings)

        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=is_real_example)

                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {
                    "eval_accuracy": accuracy,
                    "eval_loss": loss,
                }

            eval_metrics = (metric_fn, [
                per_example_loss, label_ids, logits, is_real_example
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={"probabilities": probabilities},
                scaffold_fn=scaffold_fn)
        return output_spec
  def _loss(self, experience, weights=None):
    """Computes loss for behavioral cloning.

    Args:
      experience: A `Trajectory` containing experience.
      weights: Optional scalar or element-wise (per-batch-entry) importance
        weights.

    Returns:
      loss: A `LossInfo` struct.

    Raises:
      ValueError:
        If the number of actions is greater than 1.
    """
    with tf.name_scope('loss'):
      if self._nested_actions:
        actions = experience.action
      else:
        actions = tf.nest.flatten(experience.action)[0]

      batch_size = (
          tf.compat.dimension_value(experience.step_type.shape[0]) or
          tf.shape(experience.step_type)[0])
      logits, _ = self._cloning_network(
          experience.observation,
          experience.step_type,
          training=True,
          network_state=self._cloning_network.get_initial_state(batch_size))

      error = self._loss_fn(logits, actions)
      error_dtype = tf.nest.flatten(error)[0].dtype
      boundary_weights = tf.cast(~experience.is_boundary(), error_dtype)
      error *= boundary_weights

      if nest_utils.is_batched_nested_tensors(
          experience.action, self.action_spec, num_outer_dims=2):
        # Do a sum over the time dimension.
        error = tf.reduce_sum(input_tensor=error, axis=1)

      # Average across the elements of the batch.
      # Note: We use an element wise loss above to ensure each element is always
      #   weighted by 1/N where N is the batch size, even when some of the
      #   weights are zero due to boundary transitions. Weighting by 1/K where K
      #   is the actual number of non-zero weight would artificially increase
      #   their contribution in the loss. Think about what would happen as
      #   the number of boundary samples increases.

      agg_loss = common.aggregate_losses(
          per_example_loss=error,
          sample_weight=weights,
          regularization_loss=self._cloning_network.losses)
      total_loss = agg_loss.total_loss

      dict_losses = {'loss': agg_loss.weighted,
                     'reg_loss': agg_loss.regularization,
                     'total_loss': total_loss}

      common.summarize_scalar_dict(dict_losses,
                                   step=self.train_step_counter,
                                   name_scope='Losses/')

      if self._summarize_grads_and_vars:
        with tf.name_scope('Variables/'):
          for var in self._cloning_network.trainable_weights:
            tf.compat.v2.summary.histogram(
                name=var.name.replace(':', '_'),
                data=var,
                step=self.train_step_counter)

      if self._debug_summaries:
        common.generate_tensor_summaries('errors', error,
                                         self.train_step_counter)

      return tf_agent.LossInfo(total_loss,
                               BehavioralCloningLossInfo(loss=error))
Example #44
0
def train(train_dir,
          config,
          dataset,
          checkpoints_to_keep=5,
          keep_checkpoint_every_n_hours=1,
          num_steps=None,
          master='',
          num_sync_workers=0,
          num_ps_tasks=0,
          task=0):
  """Train loop."""
  tf.gfile.MakeDirs(train_dir)
  is_chief = (task == 0)
  if is_chief:
    _trial_summary(config.hparams, config.train_examples_path, train_dir)
  with tf.Graph().as_default():
    with tf.device(tf.train.replica_device_setter(
        num_ps_tasks, merge_devices=True)):

      model = config.model
      model.build(config.hparams,
                  config.data_converter.output_depth,
                  is_training=True)

      optimizer = model.train(**_get_input_tensors(dataset, config))

      hooks = []
      if num_sync_workers:
        optimizer = tf.train.SyncReplicasOptimizer(
            optimizer,
            num_sync_workers)
        hooks.append(optimizer.make_session_run_hook(is_chief))

      grads, var_list = zip(*optimizer.compute_gradients(model.loss))
      global_norm = tf.global_norm(grads)
      tf.summary.scalar('global_norm', global_norm)

      if config.hparams.clip_mode == 'value':
        g = config.hparams.grad_clip
        clipped_grads = [tf.clip_by_value(grad, -g, g) for grad in grads]
      elif config.hparams.clip_mode == 'global_norm':
        clipped_grads = tf.cond(
            global_norm < config.hparams.grad_norm_clip_to_zero,
            lambda: tf.clip_by_global_norm(  # pylint:disable=g-long-lambda
                grads, config.hparams.grad_clip, use_norm=global_norm)[0],
            lambda: [tf.zeros(tf.shape(g)) for g in grads])
      else:
        raise ValueError(
            'Unknown clip_mode: {}'.format(config.hparams.clip_mode))
      train_op = optimizer.apply_gradients(
          zip(clipped_grads, var_list), global_step=model.global_step,
          name='train_step')

      logging_dict = {'global_step': model.global_step,
                      'loss': model.loss}

      hooks.append(tf.train.LoggingTensorHook(logging_dict, every_n_iter=100))
      if num_steps:
        hooks.append(tf.train.StopAtStepHook(last_step=num_steps))

      scaffold = tf.train.Scaffold(
          saver=tf.train.Saver(
              max_to_keep=checkpoints_to_keep,
              keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours))
      tf.contrib.training.train(
          train_op=train_op,
          logdir=train_dir,
          scaffold=scaffold,
          hooks=hooks,
          save_checkpoint_secs=60,
          master=master,
          is_chief=is_chief)
Example #45
0
 def _batch_shape_tensor(self):
   return tf.broadcast_dynamic_shape(
       tf.shape(input=self.loc), tf.shape(input=self.scale))
  def __init__(self,
               config,
               x,
               y,
               x_b,
               y_b,
               x_b_v,
               y_b_v,
               num_classes_a,
               num_classes_b,
               is_training=True,
               ext_wts=None,
               y_sel=None,
               w_class_a=None,
               b_class_a=None,
               nshot=None):
    self._config = config
    self._is_training = is_training
    self._num_classes_a = num_classes_a
    self._num_classes_b = num_classes_b

    if config.backbone_class == 'resnet_backbone':
      bb_config = config.resnet_config
    else:
      assert False, 'Not supported'
    opt_config = config.optimizer_config
    proto_config = config.protonet_config
    transfer_config = config.transfer_config

    self._backbone = get_model(config.backbone_class, bb_config)
    self._inputs = x
    self._labels = y
    # if opt_config.num_gpu > 1:
    #   self._labels_all = allgather(self._labels)
    # else:
    self._labels_all = self._labels
    self._inputs_b = x_b
    self._labels_b = y_b
    self._inputs_b_v = x_b_v
    self._labels_b_v = y_b_v
    # if opt_config.num_gpu > 1:
    #   self._labels_b_v_all = allgather(self._labels_b_v)
    # else:
    self._labels_b_v_all = self._labels_b_v
    self._y_sel = y_sel
    self._mask = tf.placeholder(tf.bool, [], name='mask')

    # global_step = tf.get_variable(
    #     'global_step', shape=[], dtype=tf.int64, trainable=False)
    global_step = tf.contrib.framework.get_or_create_global_step()
    self._global_step = global_step
    log.info('LR decay steps {}'.format(opt_config.lr_decay_steps))
    log.info('LR list {}'.format(opt_config.lr_list))
    learn_rate = tf.train.piecewise_constant(
        global_step, list(
            np.array(opt_config.lr_decay_steps).astype(np.int64)),
        list(opt_config.lr_list))
    self._learn_rate = learn_rate

    opt = self.get_optimizer(opt_config.optimizer, learn_rate)
    # if opt_config.num_gpu > 1:
    #   opt = hvd.DistributedOptimizer(opt)

    with tf.name_scope('TaskA'):
      h_a = self.backbone(x, is_training=is_training, ext_wts=ext_wts)
      self._h_a = h_a

    # Apply BN ops.
    bn_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.name_scope('TaskB'):
      x_b_all = tf.concat([x_b, x_b_v], axis=0)
      if ext_wts is not None:
        h_b_all = self.backbone(
            x_b_all, is_training=is_training, reuse=True, ext_wts=ext_wts)
      else:
        h_b_all = self.backbone(x_b_all, is_training=is_training, reuse=True)

    with tf.name_scope('TaskA'):
      # Calculates hidden activation size.
      h_shape = h_a.get_shape()
      h_size = 1
      for ss in h_shape[1:]:
        h_size *= int(ss)

      if w_class_a is None:
        if ext_wts is not None:
          w_class_a = weight_variable(
              [h_size, num_classes_a],
              init_method='numpy',
              dtype=tf.float32,
              init_param={'val': np.transpose(ext_wts['w_class_a'])},
              wd=config.wd,
              name='w_class_a')
          b_class_a = weight_variable([],
                                      init_method='numpy',
                                      dtype=tf.float32,
                                      init_param={'val': ext_wts['b_class_a']},
                                      wd=0e0,
                                      name='b_class_a')
        else:
          w_class_a = weight_variable([h_size, num_classes_a],
                                      init_method='truncated_normal',
                                      dtype=tf.float32,
                                      init_param={'stddev': 0.01},
                                      wd=bb_config.wd,
                                      name='w_class_a')
          b_class_a = weight_variable([num_classes_a],
                                      init_method='constant',
                                      init_param={'val': 0.0},
                                      name='b_class_a')
        self._w_class_a_orig = w_class_a
        self._b_class_a_orig = b_class_a
      else:
        assert b_class_a is not None
        w_class_a_orig = weight_variable([h_size, num_classes_a],
                                         init_method='truncated_normal',
                                         dtype=tf.float32,
                                         init_param={'stddev': 0.01},
                                         wd=bb_config.wd,
                                         name='w_class_a')
        b_class_a_orig = weight_variable([num_classes_a],
                                         init_method='constant',
                                         init_param={'val': 0.0},
                                         name='b_class_a')
        self._w_class_a_orig = w_class_a_orig
        self._b_class_a_orig = b_class_a_orig

      self._w_class_a = w_class_a
      self._b_class_a = b_class_a
      num_classes_a_dyn = tf.cast(tf.shape(b_class_a)[0], tf.int64)
      num_classes_a_dyn32 = tf.shape(b_class_a)[0]

      if proto_config.cosine_a:
        if proto_config.cosine_tau:
          if ext_wts is None:
            init_val = 10.0
          else:
            init_val = ext_wts['tau'][0]
          tau = weight_variable([],
                                init_method='constant',
                                init_param={'val': init_val},
                                name='tau')
        else:
          tau = tf.constant(1.0)
        w_class_a_norm = self._normalize(w_class_a, 0)
        h_a_norm = self._normalize(h_a, 1)
        dot = tf.matmul(h_a_norm, w_class_a_norm)
        if ext_wts is not None:
          dot += b_class_a
        logits_a = tau * dot
      else:
        logits_a = compute_euc(tf.transpose(w_class_a), h_a)
      self._prediction_a = logits_a
      # if opt_config.num_gpu > 1:
      #   self._prediction_a_all = allgather(self._prediction_a)
      # else:
      self._prediction_a_all = self._prediction_a

      xent_a = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits_a, labels=y)
      cost_a = tf.reduce_mean(xent_a, name='xent')
      self._cost_a = cost_a
      cost_a += self._decay()
      correct_a = tf.equal(tf.argmax(logits_a, axis=1), y)
      self._correct_a = correct_a
      self._acc_a = tf.reduce_mean(tf.cast(correct_a, cost_a.dtype))

    with tf.name_scope('TaskB'):
      h_b = h_b_all[:tf.shape(x_b)[0]]
      h_b_v = h_b_all[tf.shape(x_b)[0]:]

      # Add new axes for the `batch` dimension.
      h_b_ = tf.expand_dims(h_b, 0)
      h_b_v_ = tf.expand_dims(h_b_v, 0)
      y_b_ = tf.expand_dims(y_b, 0)
      y_b_v_ = tf.expand_dims(y_b_v, 0)

      if transfer_config.old_and_new:
        protos_b = self._compute_protos(num_classes_b, h_b_,
                                        y_b_ - num_classes_a)
      else:
        protos_b = self._compute_protos(num_classes_b, h_b_, y_b_)

      w_class_a_ = tf.expand_dims(tf.transpose(w_class_a), 0)
      if proto_config.protos_phi:
        w_p1 = weight_variable([h_size],
                               init_method='constant',
                               dtype=tf.float32,
                               init_param={'val': 1.0},
                               wd=bb_config.wd,
                               name='w_p1')
      if proto_config.cosine_attention:
        w_q = weight_variable([h_size, h_size],
                              init_method='truncated_normal',
                              dtype=tf.float32,
                              init_param={'stddev': 0.1},
                              wd=bb_config.wd,
                              name='w_q')
        k_b = weight_variable([num_classes_a, h_size],
                              init_method='truncated_normal',
                              dtype=tf.float32,
                              init_param={'stddev': 0.1},
                              wd=bb_config.wd,
                              name='k_b')
        tau_q = weight_variable([],
                                init_method='constant',
                                init_param={'val': 10.0},
                                name='tau_q')
        if transfer_config.old_and_new:
          w_class_b = self._compute_protos_attend_fix(
              num_classes_b, h_b_, y_b_ - num_classes_a_dyn, w_q, tau_q, k_b,
              self._w_class_a_orig)
        else:
          w_class_b = self._compute_protos_attend_fix(
              num_classes_b, h_b_, y_b_, w_q, tau_q, k_b, self._w_class_a_orig)
        assert proto_config.protos_phi
        w_p2 = weight_variable([h_size],
                               init_method='constant',
                               dtype=tf.float32,
                               init_param={'val': 1.0},
                               wd=bb_config.wd,
                               name='w_p2')
        self._k_b = tf.expand_dims(w_p2, 1) * self._w_class_a_orig
        self._k_b2 = k_b
        self.bias = w_class_b
        self.new_protos = w_p1 * protos_b
        self.new_bias = w_p2 * w_class_b
        w_class_b = w_p1 * protos_b + w_p2 * w_class_b
        self.protos = protos_b
        self.w_class_b_final = w_class_b
      else:
        w_class_b = protos_b
        if proto_config.protos_phi:
          w_class_b = w_p1 * w_class_b

      self._w_class_b = w_class_b

      if transfer_config.old_and_new:
        w_class_all = tf.concat([w_class_a_, w_class_b], axis=1)
      else:
        w_class_all = w_class_b

      if proto_config.cosine_softmax_tau:
        tau_b = weight_variable([],
                                init_method='constant',
                                init_param={'val': 10.0},
                                name='tau_b')
      else:
        tau_b = tf.constant(1.0)

      if proto_config.similarity == 'euclidean':
        logits_b_v = compute_logits(w_class_all, h_b_v_)
      elif proto_config.similarity == 'cosine':
        logits_b_v = tau_b * compute_logits_cosine(w_class_all, h_b_v_)
      else:
        raise ValueError('Unknown similarity')
      self._logits_b_v = logits_b_v
      self._prediction_b = logits_b_v[0]
      # if opt_config.num_gpu > 1:
      #   self._prediction_b_all = allgather(self._prediction_b)
      # else:
      self._prediction_b_all = self._prediction_b

      # Mask out the old classes.
      def mask_fn():
        bin_mask = tf.expand_dims(
            tf.reduce_sum(
                tf.one_hot(y_sel, num_classes_a + num_classes_b),
                0,
                keep_dims=True), 0)
        logits_b_v_m = logits_b_v * (1.0 - bin_mask)
        logits_b_v_m -= bin_mask * 100.0
        return logits_b_v_m

      # if transfer_config.old_and_new:
      #   logits_b_v = tf.cond(self._mask, mask_fn, lambda: logits_b_v)
      xent_b_v = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits_b_v, labels=y_b_v_)
      cost_b = tf.reduce_mean(xent_b_v, name='xent')
      self._cost_b = cost_b

    if transfer_config.old_and_new:
      total_cost = cost_b
    else:
      total_cost = (transfer_config.cost_a_ratio * cost_a +
                    transfer_config.cost_b_ratio * cost_b)
    self._total_cost = total_cost

    if not transfer_config.meta_only:
      # assert False, 'let us go for pretrained model first'
      var_list = tf.trainable_variables()
      var_list = list(filter(lambda x: 'phi' in x.name, var_list))
      layers = self.config.transfer_config.meta_layers
      if layers == "all":
        pass
      elif layers == "4":
        keywords = ['TaskB', 'unit_4_']
        filter_fn = lambda x: any([kw in x.name for kw in keywords])
        var_list = list(filter(filter_fn, var_list))
      else:
        raise ValueError('Unknown finetune layers {}'.format(layers))
      [log.info('Slow weights {}'.format(v.name)) for v in var_list]
    else:
      var_list = []

    if proto_config.cosine_softmax_tau:
      var_list += [tau_b]

    if proto_config.cosine_attention:
      var_list += [w_q, tau_q, k_b, w_p2]

    if proto_config.protos_phi:
      var_list += [w_p1]

    if transfer_config.train_wclass_a:
      if proto_config.similarity == 'euclidean':
        var_list += [w_class_a, b_class_a]
      elif proto_config.similarity == 'cosine':
        var_list += [w_class_a]

    if is_training:
      grads_and_vars = opt.compute_gradients(total_cost, var_list)
      with tf.control_dependencies(bn_ops):
        [log.info('BN op {}'.format(op.name)) for op in bn_ops]
        train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)

      grads_and_vars_b = opt.compute_gradients(cost_b, var_list)
      with tf.control_dependencies(bn_ops):
        train_op_b = opt.apply_gradients(
            grads_and_vars_b, global_step=global_step)

      with tf.control_dependencies(bn_ops):
        train_op_a = opt.minimize(cost_a, global_step=global_step)
      self._train_op = train_op
      self._train_op_a = train_op_a
      self._train_op_b = train_op_b
    self._initializer = tf.global_variables_initializer()
    self._w_class_a = w_class_a
def _random_crop(image_list, crop_height, crop_width):
  """Crops the given list of images.

  The function applies the same crop to each image in the list. This can be
  effectively applied when there are multiple image inputs of the same
  dimension such as:

    image, depths, normals = _random_crop([image, depths, normals], 120, 150)

  Args:
    image_list: a list of image tensors of the same dimension but possibly
      varying channel.
    crop_height: the new height.
    crop_width: the new width.

  Returns:
    the image_list with cropped images.

  Raises:
    ValueError: if there are multiple image inputs provided with different size
      or the images are smaller than the crop dimensions.
  """
  if not image_list:
    raise ValueError('Empty image_list.')

  # Compute the rank assertions.
  rank_assertions = []
  for i in range(len(image_list)):
    image_rank = tf.rank(image_list[i])
    rank_assert = tf.Assert(
        tf.equal(image_rank, 3),
        ['Wrong rank for tensor  %s [expected] [actual]',
         image_list[i].name, 3, image_rank])
    rank_assertions.append(rank_assert)

  with tf.control_dependencies([rank_assertions[0]]):
    image_shape = tf.shape(image_list[0])
  image_height = image_shape[0]
  image_width = image_shape[1]
  crop_size_assert = tf.Assert(
      tf.logical_and(
          tf.greater_equal(image_height, crop_height),
          tf.greater_equal(image_width, crop_width)),
      ['Crop size greater than the image size.'])

  asserts = [rank_assertions[0], crop_size_assert]

  for i in range(1, len(image_list)):
    image = image_list[i]
    asserts.append(rank_assertions[i])
    with tf.control_dependencies([rank_assertions[i]]):
      shape = tf.shape(image)
    height = shape[0]
    width = shape[1]

    height_assert = tf.Assert(
        tf.equal(height, image_height),
        ['Wrong height for tensor %s [expected][actual]',
         image.name, height, image_height])
    width_assert = tf.Assert(
        tf.equal(width, image_width),
        ['Wrong width for tensor %s [expected][actual]',
         image.name, width, image_width])
    asserts.extend([height_assert, width_assert])

  # Create a random bounding box.
  #
  # Use tf.random_uniform and not numpy.random.rand as doing the former would
  # generate random numbers at graph eval time, unlike the latter which
  # generates random numbers at graph definition time.
  with tf.control_dependencies(asserts):
    max_offset_height = tf.reshape(image_height - crop_height + 1, [])
  with tf.control_dependencies(asserts):
    max_offset_width = tf.reshape(image_width - crop_width + 1, [])
  offset_height = tf.random_uniform(
      [], maxval=max_offset_height, dtype=tf.int32)
  offset_width = tf.random_uniform(
      [], maxval=max_offset_width, dtype=tf.int32)

  return [_crop(image, offset_height, offset_width,
                crop_height, crop_width) for image in image_list]
def Deconv2D(
    name,
    input_dim,
    output_dim,
    filter_size,
    inputs,
    he_init=True,
    weightnorm=None,
    biases=True,
    gain=1.,
    mask_type=None,
):
    """
    inputs: tensor of shape (batch size, height, width, input_dim)
    returns: tensor of shape (batch size, 2*height, 2*width, output_dim)
    """
    with tf.name_scope(name) as scope:

        if mask_type != None:
            raise Exception('Unsupported configuration')

        def uniform(stdev, size):
            return np.random.uniform(low=-stdev * np.sqrt(3),
                                     high=stdev * np.sqrt(3),
                                     size=size).astype('float32')

        stride = 2
        fan_in = input_dim * filter_size**2 / (stride**2)
        fan_out = output_dim * filter_size**2

        if he_init:
            filters_stdev = np.sqrt(4. / (fan_in + fan_out))
        else:  # Normalized init (Glorot & Bengio)
            filters_stdev = np.sqrt(2. / (fan_in + fan_out))

        if _weights_stdev is not None:
            filter_values = uniform(
                _weights_stdev,
                (filter_size, filter_size, output_dim, input_dim))
        else:
            filter_values = uniform(
                filters_stdev,
                (filter_size, filter_size, output_dim, input_dim))

        filter_values *= gain

        filters = lib.param(name + '.Filters', filter_values)

        if weightnorm == None:
            weightnorm = _default_weightnorm
        if weightnorm:
            norm_values = np.sqrt(
                np.sum(np.square(filter_values), axis=(0, 1, 3)))
            target_norms = lib.param(name + '.g', norm_values)
            with tf.name_scope('weightnorm') as scope:
                norms = tf.sqrt(
                    tf.reduce_sum(tf.square(filters),
                                  reduction_indices=[0, 1, 3]))
                filters = filters * tf.expand_dims(target_norms / norms, 1)

        #inputs = tf.transpose(inputs, [0, 3, 1, 2], name='NCHW_to_NHWC')
        #inputs = tf.transpose(inputs, [0,2,3,1], name='NCHW_to_NHWC')

        input_shape = tf.shape(inputs)
        try:  # tf pre-1.0 (top) vs 1.0 (bottom)
            output_shape = tf.pack([
                input_shape[0], 2 * input_shape[1], 2 * input_shape[2],
                output_dim
            ])
        except Exception as e:
            output_shape = tf.stack([
                input_shape[0], 2 * input_shape[1], 2 * input_shape[2],
                output_dim
            ])

        result = tf.nn.conv2d_transpose(value=inputs,
                                        filter=filters,
                                        output_shape=output_shape,
                                        strides=[1, 2, 2, 1],
                                        padding='SAME')

        if biases:
            _biases = lib.param(name + '.Biases',
                                np.zeros(output_dim, dtype='float32'))
            result = tf.nn.bias_add(result, _biases)

        #result = tf.transpose(result, [0,3,1,2], name='NHWC_to_NCHW')

        return result
def sample_z(mu, log_var):
    eps = tf.random_normal(shape=tf.shape(mu))
    return mu + tf.exp(log_var / 2) * eps
Example #50
0
  def _fast_decode(self,
                   features,
                   decode_length,
                   beam_size=1,
                   top_beams=1,
                   alpha=1.0):
    """Fast decoding.

    Implements both greedy and beam search decoding, uses beam search iff
    beam_size > 1, otherwise beam search related arguments are ignored.

    Args:
      features: a map of string to model  features.
      decode_length: an integer.  How many additional timesteps to decode.
      beam_size: number of beams.
      top_beams: an integer. How many of the beams to return.
      alpha: Float that controls the length penalty. larger the alpha, stronger
        the preference for slonger translations.

    Returns:
       samples: an integer `Tensor`. Top samples from the beam search

    Raises:
      NotImplementedError: If there are multiple data shards.
    """
    if self._num_datashards != 1:
      raise NotImplementedError("Fast decoding only supports a single shard.")
    dp = self._data_parallelism
    hparams = self._hparams

    inputs = features["inputs"]
    batch_size = tf.shape(inputs)[0]
    target_modality = self._problem_hparams.target_modality
    if t2t_model.is_class_modality(target_modality):
      decode_length = 1
    else:
      decode_length = tf.shape(inputs)[1] + decode_length

    # TODO(llion): Clean up this reshaping logic.
    inputs = tf.expand_dims(inputs, axis=1)
    if len(inputs.shape) < 5:
      inputs = tf.expand_dims(inputs, axis=4)
    s = tf.shape(inputs)
    inputs = tf.reshape(inputs, [s[0] * s[1], s[2], s[3], s[4]])
    # _shard_features called to ensure that the variable names match
    inputs = self._shard_features({"inputs": inputs})["inputs"]
    input_modality = self._problem_hparams.input_modality["inputs"]
    with tf.variable_scope(input_modality.name):
      inputs = input_modality.bottom_sharded(inputs, dp)
    with tf.variable_scope("body"):
      encoder_output, encoder_decoder_attention_bias = dp(
          self.encode, inputs, features["target_space_id"], hparams)
    encoder_output = encoder_output[0]
    encoder_decoder_attention_bias = encoder_decoder_attention_bias[0]

    if hparams.pos == "timing":
      timing_signal = common_attention.get_timing_signal_1d(
          decode_length + 1, hparams.hidden_size)

    def preprocess_targets(targets, i):
      """Performs preprocessing steps on the targets to prepare for the decoder.

      This includes:
        - Embedding the ids.
        - Flattening to 3D tensor.
        - Optionally adding timing signals.

      Args:
        targets: inputs ids to the decoder. [batch_size, 1]
        i: scalar, Step number of the decoding loop.

      Returns:
        Processed targets [batch_size, 1, hidden_dim]
      """
      # _shard_features called to ensure that the variable names match
      targets = self._shard_features({"targets": targets})["targets"]
      with tf.variable_scope(target_modality.name):
        targets = target_modality.targets_bottom_sharded(targets, dp)[0]
      targets = common_layers.flatten4d3d(targets)

      # TODO(llion): Explain! Is this even needed?
      targets = tf.cond(
          tf.equal(i, 0), lambda: tf.zeros_like(targets), lambda: targets)

      if hparams.pos == "timing":
        targets += timing_signal[:, i:i + 1]
      return targets

    decoder_self_attention_bias = (
        common_attention.attention_bias_lower_triangle(decode_length))
    if hparams.proximity_bias:
      decoder_self_attention_bias += common_attention.attention_bias_proximal(
          decode_length)

    def symbols_to_logits_fn(ids, i, cache):
      """Go from ids to logits for next symbol."""
      ids = ids[:, -1:]
      targets = tf.expand_dims(tf.expand_dims(ids, axis=2), axis=3)
      targets = preprocess_targets(targets, i)

      bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1]

      with tf.variable_scope("body"):
        body_outputs = dp(self.decode, targets, cache["encoder_output"],
                          cache["encoder_decoder_attention_bias"], bias,
                          hparams, cache)

      with tf.variable_scope(target_modality.name):
        logits = target_modality.top_sharded(body_outputs, None, dp)[0]

      return tf.squeeze(logits, axis=[1, 2, 3]), cache

    key_channels = hparams.attention_key_channels or hparams.hidden_size
    value_channels = hparams.attention_value_channels or hparams.hidden_size
    num_layers = hparams.num_decoder_layers or hparams.num_hidden_layers

    cache = {
        "layer_%d" % layer: {
            "k": tf.zeros([batch_size, 0, key_channels]),
            "v": tf.zeros([batch_size, 0, value_channels]),
        }
        for layer in range(num_layers)
    }

    # Set 2nd dim to None since it's not invariant in the tf.while_loop
    # Note: Tensor.set_shape() does not work here since it merges shape info.
    # TODO(llion); Find a more robust solution.
    # pylint: disable=protected-access
    for layer in cache:
      cache[layer]["k"]._shape = tf.TensorShape([None, None, key_channels])
      cache[layer]["v"]._shape = tf.TensorShape([None, None, value_channels])
    # pylint: enable=protected-access
    cache["encoder_output"] = encoder_output
    cache["encoder_decoder_attention_bias"] = encoder_decoder_attention_bias

    if beam_size > 1:  # Beam Search
      target_modality = (
          self._hparams.problems[self._problem_idx].target_modality)
      vocab_size = target_modality.top_dimensionality
      initial_ids = tf.zeros([batch_size], dtype=tf.int32)
      decoded_ids, scores = beam_search.beam_search(
          symbols_to_logits_fn, initial_ids, beam_size, decode_length,
          vocab_size, alpha, states=cache, stop_early=(top_beams == 1))

      if top_beams == 1:
        decoded_ids = decoded_ids[:, 0, 1:]
      else:
        decoded_ids = decoded_ids[:, :top_beams, 1:]
    else:  # Greedy

      def inner_loop(i, next_id, decoded_ids, cache):
        logits, cache = symbols_to_logits_fn(next_id, i, cache)
        temperature = (0.0 if hparams.sampling_method == "argmax"
                       else hparams.sampling_temp)
        next_id = tf.expand_dims(
            common_layers.sample_with_temperature(logits, temperature), axis=1)
        decoded_ids = tf.concat([decoded_ids, next_id], axis=1)
        return i + 1, next_id, decoded_ids, cache

      decoded_ids = tf.zeros([batch_size, 0], dtype=tf.int64)
      scores = None
      next_id = tf.zeros([batch_size, 1], dtype=tf.int64)
      _, _, decoded_ids, _ = tf.while_loop(
          # TODO(llion): Early stopping.
          lambda i, *_: tf.less(i, decode_length),
          inner_loop,
          [tf.constant(0), next_id, decoded_ids, cache],
          shape_invariants=[
              tf.TensorShape([]),
              tf.TensorShape([None, None]),
              tf.TensorShape([None, None]),
              nest.map_structure(lambda t: tf.TensorShape(t.shape), cache),
          ])

    return decoded_ids, scores
Example #51
0
def train(train_data, test_data=None):
    G = train_data[0]
    features = train_data[1]
    id_map = train_data[2]

    if not features is None:
        # pad with dummy zero vector
        features = np.vstack([features, np.zeros((features.shape[1],))])

    context_pairs = train_data[3] if FLAGS.random_context else None
    placeholders = construct_placeholders()
    minibatch = EdgeMinibatchIterator(G,
                                      id_map,
                                      placeholders, batch_size=FLAGS.batch_size,
                                      max_degree=FLAGS.max_degree,
                                      num_neg_samples=FLAGS.neg_sample_size,
                                      context_pairs=context_pairs)
    adj_info_ph = tf.placeholder(tf.int32, shape=minibatch.adj.shape)
    adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info")

    if FLAGS.model == 'graphsage_mean':
        # Create model
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                       SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   logging=True)
    elif FLAGS.model == 'gcn':
        # Create model
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, 2 * FLAGS.dim_1),
                       SAGEInfo("node", sampler, FLAGS.samples_2, 2 * FLAGS.dim_2)]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   aggregator_type="gcn",
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   concat=False,
                                   logging=True)

    elif FLAGS.model == 'graphsage_seq':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                       SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   identity_dim=FLAGS.identity_dim,
                                   aggregator_type="seq",
                                   model_size=FLAGS.model_size,
                                   logging=True)

    elif FLAGS.model == 'graphsage_maxpool':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                       SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   aggregator_type="maxpool",
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   logging=True)
    elif FLAGS.model == 'graphsage_meanpool':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
                       SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   aggregator_type="meanpool",
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   logging=True)

    elif FLAGS.model == 'n2v':
        model = Node2VecModel(placeholders, features.shape[0],
                              minibatch.deg,
                              # 2x because graphsage uses concat
                              nodevec_dim=2 * FLAGS.dim_1,
                              lr=FLAGS.learning_rate)
    else:
        raise Exception('Error: model name unrecognized.')

    config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement)
    config.gpu_options.allow_growth = True
    # config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION
    config.allow_soft_placement = True

    # Initialize session
    sess = tf.Session(config=config)
    merged = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(log_dir(), sess.graph)

    # Init variables
    sess.run(tf.global_variables_initializer(), feed_dict={adj_info_ph: minibatch.adj})

    # Train model

    train_shadow_mrr = None
    shadow_mrr = None

    total_steps = 0
    avg_time = 0.0
    epoch_val_costs = []

    train_adj_info = tf.assign(adj_info, minibatch.adj)
    val_adj_info = tf.assign(adj_info, minibatch.test_adj)
    for epoch in range(FLAGS.epochs):
        minibatch.shuffle()

        iter = 0
        print('Epoch: %04d' % (epoch + 1))
        epoch_val_costs.append(0)
        while not minibatch.end():
            # Construct feed dictionary
            feed_dict = minibatch.next_minibatch_feed_dict()
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            t = time.time()
            # Training step
            outs = sess.run([merged, model.opt_op, model.loss, model.ranks, model.aff_all,
                             model.mrr, model.outputs1], feed_dict=feed_dict)
            negative_sample=sess.run(model.neg_samples,feed_dict=feed_dict)
            train_cost = outs[2]
            train_mrr = outs[5]
            if train_shadow_mrr is None:
                train_shadow_mrr = train_mrr  #
            else:
                train_shadow_mrr -= (1 - 0.99) * (train_shadow_mrr - train_mrr)

            if iter % FLAGS.validate_iter == 0:
                # Validation
                sess.run(val_adj_info.op)
                val_cost, ranks, val_mrr, duration = evaluate(sess, model, minibatch, size=FLAGS.validate_batch_size)
                sess.run(train_adj_info.op)
                epoch_val_costs[-1] += val_cost
            if shadow_mrr is None:
                shadow_mrr = val_mrr
            else:
                shadow_mrr -= (1 - 0.99) * (shadow_mrr - val_mrr)

            if total_steps % FLAGS.print_every == 0:
                summary_writer.add_summary(outs[0], total_steps)

            # Print results
            avg_time = (avg_time * total_steps + time.time() - t) / (total_steps + 1)

            if total_steps % FLAGS.print_every == 0:
                print("Iter:", '%04d' % iter,
                      "train_loss=", "{:.5f}".format(train_cost),
                      "train_mrr=", "{:.5f}".format(train_mrr),
                      "train_mrr_ema=", "{:.5f}".format(train_shadow_mrr),  # exponential moving average
                      "val_loss=", "{:.5f}".format(val_cost),
                      "val_mrr=", "{:.5f}".format(val_mrr),
                      "val_mrr_ema=", "{:.5f}".format(shadow_mrr),  # exponential moving average
                      "time=", "{:.5f}".format(avg_time))

            iter += 1
            total_steps += 1

            if total_steps > FLAGS.max_total_steps:
                break

        if total_steps > FLAGS.max_total_steps:
            break

    print("Optimization Finished!")
    if FLAGS.save_embeddings:
        sess.run(val_adj_info.op)

        save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size, log_dir())

        if FLAGS.model == "n2v":
            # stopping the gradient for the already trained nodes
            train_ids = tf.constant(
                [[id_map[n]] for n in G.nodes_iter() if not G.node[n]['val'] and not G.node[n]['test']],
                dtype=tf.int32)
            test_ids = tf.constant([[id_map[n]] for n in G.nodes_iter() if G.node[n]['val'] or G.node[n]['test']],
                                   dtype=tf.int32)
            update_nodes = tf.nn.embedding_lookup(model.context_embeds, tf.squeeze(test_ids))
            no_update_nodes = tf.nn.embedding_lookup(model.context_embeds, tf.squeeze(train_ids))
            update_nodes = tf.scatter_nd(test_ids, update_nodes, tf.shape(model.context_embeds))
            no_update_nodes = tf.stop_gradient(
                tf.scatter_nd(train_ids, no_update_nodes, tf.shape(model.context_embeds)))
            model.context_embeds = update_nodes + no_update_nodes
            sess.run(model.context_embeds)

            # run random walks
            from graphsage.utils import run_random_walks
            nodes = [n for n in G.nodes_iter() if G.node[n]["val"] or G.node[n]["test"]]
            start_time = time.time()
            pairs = run_random_walks(G, nodes, num_walks=50)
            walk_time = time.time() - start_time

            test_minibatch = EdgeMinibatchIterator(G,
                                                   id_map,
                                                   placeholders, batch_size=FLAGS.batch_size,
                                                   max_degree=FLAGS.max_degree,
                                                   num_neg_samples=FLAGS.neg_sample_size,
                                                   context_pairs=pairs,
                                                   n2v_retrain=True,
                                                   fixed_n2v=True)

            start_time = time.time()
            print("Doing test training for n2v.")
            test_steps = 0
            for epoch in range(FLAGS.n2v_test_epochs):
                test_minibatch.shuffle()
                while not test_minibatch.end():
                    feed_dict = test_minibatch.next_minibatch_feed_dict()
                    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
                    outs = sess.run([model.opt_op, model.loss, model.ranks, model.aff_all,
                                     model.mrr, model.outputs1], feed_dict=feed_dict)
                    if test_steps % FLAGS.print_every == 0:
                        print("Iter:", '%04d' % test_steps,
                              "train_loss=", "{:.5f}".format(outs[1]),
                              "train_mrr=", "{:.5f}".format(outs[-2]))
                    test_steps += 1
            train_time = time.time() - start_time
            save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size, log_dir(), mod="-test")
            print("Total time: ", train_time + walk_time)
            print("Walk time: ", walk_time)
            print("Train time: ", train_time)
Example #52
0
def unique_with_inverse(x):
    y, idx = tf.unique(x)
    num_segments = tf.shape(y)[0]
    num_elems = tf.shape(x)[0]
    return y, tf.math.unsorted_segment_min(tf.range(num_elems), idx,
                                           num_segments)
Example #53
0
    def forward(self, input, reuse=False):
        with tf.variable_scope('forward_model'):
            state = tf.cast(input[0], tf.float32)
            action = tf.cast(input[1], tf.float32)
            gru_state = tf.cast(input[2], tf.float32)

            if self.forward_model_type in ['kl-rssm', 'mmd-rssm']:
                hidden = tf.concat([action], -1)
                for i in range(2):
                    hidden = tf.layers.dense(hidden,
                                             **dict(units=self.encoding_size,
                                                    activation=tf.nn.elu),
                                             name='prior_enc_{}'.format(i),
                                             reuse=tf.AUTO_REUSE)
                belief, rnn_state = self._cell(hidden, tf.zeros_like(hidden))
                prior = {
                    'belief': belief,
                }
                hidden = tf.concat([prior['belief'], state], -1)
                for i in range(2):
                    hidden = tf.layers.dense(hidden,
                                             **dict(units=self.encoding_size,
                                                    activation=tf.nn.elu),
                                             name='post_dec_{}'.format(i),
                                             reuse=tf.AUTO_REUSE)
                mean = tf.layers.dense(hidden,
                                       self.state_size,
                                       None,
                                       name='post_mean',
                                       reuse=tf.AUTO_REUSE)

                sample = mean

                gru_state = belief
                next_state = sample
                divergence_loss = 0.
            elif self.forward_model_type in ['transformer']:
                # State embedding
                state_embedder1 = ops.dense(state, self.state_size,
                                            self.encoding_size, tf.nn.relu,
                                            "encoder1_state", reuse)
                divergence_loss = 0.
                state_embedder2 = ops.dense(state_embedder1,
                                            self.encoding_size,
                                            self.encoding_size, tf.sigmoid,
                                            "encoder2_state", reuse)

                # Action embedding
                action_embedder1 = ops.dense(action, self.action_size,
                                             self.encoding_size, tf.nn.relu,
                                             "encoder1_action", reuse)
                action_embedder2 = ops.dense(action_embedder1,
                                             self.encoding_size,
                                             self.encoding_size, tf.sigmoid,
                                             "encoder2_action", reuse)

                # Multi-head
                if self.use_scale_dot_product:
                    action_embedder3 = ops.dense(action_embedder1,
                                                 self.encoding_size,
                                                 self.encoding_size,
                                                 tf.sigmoid, "value", reuse)
                    batch_size = tf.shape(state)[0]
                    state_embedder2_query = self.split_heads(
                        state_embedder2, batch_size)  # query
                    action_embedder2 = self.split_heads(
                        action_embedder2, batch_size)  # key
                    action_embedder3 = self.split_heads(
                        action_embedder3, batch_size)  # value
                    scaled_attention = self.scaled_dot_product_attention(
                        state_embedder2_query,
                        action_embedder2,
                        action_embedder3,
                        mask=None)  # scaled_attention =
                    scaled_attention = tf.transpose(scaled_attention,
                                                    perm=[0, 2, 1, 3])
                    joint_embedding = tf.reshape(
                        scaled_attention, (batch_size, self.encoding_size))
                    # Skip Connection
                    if self.use_skip_connection:
                        joint_embedding = ops.dense(joint_embedding,
                                                    self.encoding_size,
                                                    self.encoding_size, None,
                                                    "cross_att_dense", reuse)
                        if self.use_dropout:
                            joint_embedding = tf.nn.dropout(
                                joint_embedding,
                                keep_prob=1 - self.hidden_dropout_prob,
                            )
                        joint_embedding = joint_embedding + state_embedder2
                else:
                    # Joint embedding
                    joint_embedding = tf.multiply(state_embedder2,
                                                  action_embedder2)

                # Next state prediction
                hidden1 = ops.dense(joint_embedding, self.encoding_size,
                                    self.encoding_size, tf.nn.relu, "encoder3",
                                    reuse)
                hidden2 = ops.dense(hidden1, self.encoding_size,
                                    self.encoding_size, tf.nn.relu, "encoder4",
                                    reuse)
                hidden3 = ops.dense(hidden2, self.encoding_size,
                                    self.encoding_size, tf.nn.relu, "decoder1",
                                    reuse)
                next_state = ops.dense(hidden3, self.encoding_size,
                                       self.state_size, None, "decoder2",
                                       reuse)

                gru_state = tf.cast(gru_state, tf.float64)
            else:
                # State embedding
                state_embedder1 = ops.dense(state, self.state_size,
                                            self.encoding_size, tf.nn.relu,
                                            "encoder1_state", reuse)
                gru_state = ops.gru(state_embedder1, gru_state,
                                    self.encoding_size, self.encoding_size,
                                    'gru1', reuse)
                divergence_loss = 0.
                state_embedder2 = ops.dense(gru_state, self.encoding_size,
                                            self.encoding_size, tf.sigmoid,
                                            "encoder2_state", reuse)

                # Action embedding
                action_embedder1 = ops.dense(action, self.action_size,
                                             self.encoding_size, tf.nn.relu,
                                             "encoder1_action", reuse)
                action_embedder2 = ops.dense(action_embedder1,
                                             self.encoding_size,
                                             self.encoding_size, tf.sigmoid,
                                             "encoder2_action", reuse)

                # Joint embedding
                joint_embedding = tf.multiply(state_embedder2,
                                              action_embedder2)

                # Next state prediction
                hidden1 = ops.dense(joint_embedding, self.encoding_size,
                                    self.encoding_size, tf.nn.relu, "encoder3",
                                    reuse)
                hidden2 = ops.dense(hidden1, self.encoding_size,
                                    self.encoding_size, tf.nn.relu, "encoder4",
                                    reuse)
                hidden3 = ops.dense(hidden2, self.encoding_size,
                                    self.encoding_size, tf.nn.relu, "decoder1",
                                    reuse)
                next_state = ops.dense(hidden3, self.encoding_size,
                                       self.state_size, None, "decoder2",
                                       reuse)

                gru_state = tf.cast(gru_state, tf.float64)

            return next_state, gru_state, divergence_loss
Example #54
0
    def create_graph(self):
        print('\n[*] Defining encoder...')

        with tf.variable_scope('encoder_mean', reuse=self.reuse):
            Qz_x_mean = DenseNet(input_=self.x_batch_flat,
                            hidden_dim=self.hidden_dim, 
                            output_dim=self.z_dim, 
                            num_layers=self.num_layers, 
                            transfer_fct=self.transfer_fct,
                            act_out=None, 
                            reuse=self.reuse, 
                            kinit=self.kinit,
                            bias_init=self.bias_init,
                            drop_rate=self.drop_rate)
        
            self.encoder_mean = Qz_x_mean.output
            
        with tf.variable_scope('encoder_var', reuse=self.reuse):
            Qz_x_var = DenseNet(input_=self.x_batch_flat,
                            hidden_dim=self.hidden_dim, 
                            output_dim=self.z_dim, 
                            num_layers=self.num_layers, 
                            transfer_fct=self.transfer_fct,
                            act_out=tf.nn.softplus, 
                            reuse=self.reuse, 
                            kinit=self.kinit,
                            bias_init=self.bias_init,
                            drop_rate=self.drop_rate)
        
            self.encoder_var = Qz_x_var.output
        
        print('\n[*] Reparameterization trick...')
        self.encoder_logvar = tf.log(self.encoder_var)
        eps = tf.random_normal((self.batch_size, self.z_dim), 0, 1, dtype=tf.float32)
        self.z = tf.add(self.encoder_mean, tf.multiply(tf.sqrt(self.encoder_var), eps))
       
        print('\n[*] Defining decoder...')
        with tf.variable_scope('decoder_mean', reuse=self.reuse):
            Px_z_mean = DenseNet(input_=self.z,
                            hidden_dim=self.hidden_dim, 
                            output_dim=self.x_flat_dim, 
                            num_layers=2, 
                            transfer_fct=self.transfer_fct,
                            act_out=tf.nn.sigmoid, 
                            reuse=self.reuse, 
                            kinit=self.kinit,
                            bias_init=self.bias_init,
                            drop_rate=self.drop_rate)
        
            self.decoder_mean_flat = Px_z_mean.output
        
        eps = tf.random_normal(tf.shape(self.decoder_mean_flat), 0, 1, dtype=tf.float32)
        self.decoder_x_flat = tf.add(self.decoder_mean_flat, tf.multiply(tf.sqrt(self.sigma), eps))
        self.decoder_x = tf.reshape(self.decoder_x_flat , [-1,self.width, self.height, self.nchannel])

        
        print('\n[*] Defining sampling...')
        self.z_sample = tf.random_normal((self.batch_size, self.z_dim), 0, 1, dtype=tf.float32)
        
        with tf.variable_scope('decoder_mean', reuse=True):
            Px_z_mean = DenseNet(input_=self.z_sample,
                            hidden_dim=self.hidden_dim, 
                            output_dim=self.x_flat_dim, 
                            num_layers=2, 
                            transfer_fct=self.transfer_fct,
                            act_out=tf.nn.sigmoid, 
                            reuse=True, 
                            kinit=self.kinit,
                            bias_init=self.bias_init,
                            drop_rate=self.drop_rate)
        
            self.samples_mean_flat = Px_z_mean.output
        
        eps = tf.random_normal(tf.shape(self.samples_mean_flat), 0, 1, dtype=tf.float32)
        self.samples_flat = tf.add(self.samples_mean_flat, tf.multiply(tf.sqrt(self.sigma), eps))
        self.samples = tf.reshape(self.samples_flat , [-1,self.width, self.height, self.nchannel])
Example #55
0
 def image_summary_or_default_string(summary_name, image):
     """Returns image summaries for non-padded elements."""
     return tf.cond(tf.equal(tf.size(tf.shape(image)), 4),
                    lambda: tf.summary.image(summary_name, image),
                    lambda: tf.constant(''))
def multihead_attention(queries,
						keys,
						num_units = None,
						num_heads = 8,
						dropout_rate = 0,
						is_training = True,
						causality = False,
						scope = "multihead_attention",
						reuse = None):
	'''
	Implement multihead attention

	Args:
		queries: [Tensor], A 3-dimensions tensor with shape of [N, T_q, S_q]
		keys: [Tensor], A 3-dimensions tensor with shape of [N, T_k, S_k]
		num_units: [Int], Attention size
		num_heads: [Int], Number of heads
		dropout_rate: [Float], A ratio of dropout
		is_training: [Boolean], If true, controller of mechanism for dropout
		causality: [Boolean], If true, units that reference the future are masked
		scope: [String], Optional scope for "variable_scope"
		reuse: [Boolean], If to reuse the weights of a previous layer by the same name
	
	Returns:
		A 3-dimensions tensor with shape of [N, T_q, S]
	'''
	with tf.variable_scope(scope, reuse = reuse):
		if num_units is None:
			# length of sentence
			num_units = queries.get_shape().as_list()[-1]

		# Linear layers in Figure 2(right)
		# shape = [N, T_q, S]
		Q = tf.layers.dense(queries, num_units, activation = tf.nn.relu)
		# shape = [N, T_k, S]
		K = tf.layers.dense(keys, num_units, activation = tf.nn.relu)
		# shape = [N, T_k, S]
		V = tf.layers.dense(keys, num_units, activation = tf.nn.relu)

		# Split and concat
		# shape = [N*h, T_q, S/h]
		Q_ = tf.concat(tf.split(Q, num_heads, axis = 2), axis = 0)
		# shape = [N*h, T_k, S/h]
		K_ = tf.concat(tf.split(K, num_heads, axis = 2), axis = 0)
		# shape = [N*h, T_k, S/h]
		V_ = tf.concat(tf.split(V, num_heads, axis = 2), axis = 0)

		# shape = [N*h, T_q, T_k]
		outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))

		# Scale
		outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5)

		# Masking
		# shape = [N, T_k]
		key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis = -1)))
		# shape = [N*h, T_k]
		key_masks = tf.tile(key_masks, [num_heads, 1])
		# shape = [N*h, T_q, T_k]
		key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1])

		# If key_masks == 0 outputs = [1]*length(outputs)
		paddings = tf.ones_like(outputs) * (-math.pow(2, 32) + 1)
		# shape = [N*h, T_q, T_k]
		outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)

		if causality:
			# reduce dims : shape = [T_q, T_k]
			diag_vals = tf.ones_like(outputs[0, :, :])
			# shape = [T_q, T_k]
			# use triangular matrix to ignore the affect from future words
			# like : [[1,0,0]
			#         [1,2,0]
			#         [1,2,3]]
			tril = tf.contrib.linalg.LinearOperatorTriL(diag_vals).to_dense()
			# shape = [N*h, T_q, T_k]
			masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1])

			paddings = tf.ones_like(masks) * (-math.pow(2, 32) + 1)
			# shape = [N*h, T_q, T_k]
			outputs = tf.where(tf.equal(masks, 0), paddings, outputs)

		# Output Activation
		outputs = tf.nn.softmax(outputs)

		# Query Masking
		# shape = [N, T_q]
		query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis = -1)))
		# shape = [N*h, T_q]
		query_masks = tf.tile(query_masks, [num_heads, 1])
		# shape = [N*h, T_q, T_k]
		query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]])
		outputs *= query_masks 

		# Dropouts
		outputs = tf.layers.dropout(outputs, rate = dropout_rate, training = tf.convert_to_tensor(is_training))

		# Weighted sum
		# shape = [N*h, T_q, S/h]
		outputs = tf.matmul(outputs, V_)

		# Restore shape
		# shape = [N, T_q, S]
		outputs = tf.concat(tf.split(outputs, num_heads, axis = 0), axis = 2)

		# Residual connection
		outputs += queries

		# Normalize
		# shape = [N, T_q, S]
		outputs = normalize(outputs)

	return outputs
Example #57
0
def wct_tf(content, style, alpha, eps=1e-8):
    '''TensorFlow version of Whiten-Color Transform
       Assume that content/style encodings have shape 1xHxWxC

       See p.4 of the Universal Style Transfer paper for corresponding equations:
       https://arxiv.org/pdf/1705.08086.pdf
    '''
    # Remove batch dim and reorder to CxHxW
    content_t = tf.transpose(tf.squeeze(content), (2, 0, 1))
    style_t = tf.transpose(tf.squeeze(style), (2, 0, 1))

    Cc, Hc, Wc = tf.unstack(tf.shape(content_t))
    Cs, Hs, Ws = tf.unstack(tf.shape(style_t))

    # CxHxW -> CxH*W
    content_flat = tf.reshape(content_t, (Cc, Hc * Wc))
    style_flat = tf.reshape(style_t, (Cs, Hs * Ws))

    # Content covariance
    mc = tf.reduce_mean(content_flat, axis=1, keep_dims=True)
    fc = content_flat - mc
    fcfc = tf.matmul(fc, fc, transpose_b=True) / (
        tf.cast(Hc * Wc, tf.float32) - 1.) + tf.eye(Cc) * eps

    # Style covariance
    ms = tf.reduce_mean(style_flat, axis=1, keep_dims=True)
    fs = style_flat - ms
    fsfs = tf.matmul(fs, fs, transpose_b=True) / (
        tf.cast(Hs * Ws, tf.float32) - 1.) + tf.eye(Cs) * eps

    # tf.svd is slower on GPU, see https://github.com/tensorflow/tensorflow/issues/13603
    with tf.device('/cpu:0'):
        Sc, Uc, _ = tf.svd(fcfc)
        Ss, Us, _ = tf.svd(fsfs)

    # Filter small singular values
    k_c = tf.reduce_sum(tf.cast(tf.greater(Sc, 1e-5), tf.int32))
    k_s = tf.reduce_sum(tf.cast(tf.greater(Ss, 1e-5), tf.int32))

    # Whiten content feature
    Dc = tf.diag(tf.pow(Sc[:k_c], -0.5))
    fc_hat = tf.matmul(
        tf.matmul(tf.matmul(Uc[:, :k_c], Dc), Uc[:, :k_c], transpose_b=True),
        fc)

    # Color content with style
    Ds = tf.diag(tf.pow(Ss[:k_s], 0.5))
    fcs_hat = tf.matmul(
        tf.matmul(tf.matmul(Us[:, :k_s], Ds), Us[:, :k_s], transpose_b=True),
        fc_hat)

    # Re-center with mean of style
    fcs_hat = fcs_hat + ms

    # Blend whiten-colored feature with original content feature
    blended = alpha * fcs_hat + (1 - alpha) * (fc + mc)

    # CxH*W -> CxHxW
    blended = tf.reshape(blended, (Cc, Hc, Wc))
    # CxHxW -> 1xHxWxC
    blended = tf.expand_dims(tf.transpose(blended, (1, 2, 0)), 0)

    return blended
Example #58
0
def create_bilstm_classification_model(bert_config,
                                       is_training,
                                       response_input_ids,
                                       response_input_mask,
                                       response_segment_ids,
                                       response_text_len,
                                       response_labels,
                                       random_forward_input_ids,
                                       random_forward_input_mask,
                                       random_forward_segment_ids,
                                       random_forward_text_len,
                                       random_backward_input_ids,
                                       random_backward_input_mask,
                                       random_backward_segment_ids,
                                       random_backward_text_len,
                                       random_labels,
                                       swap_forward_input_ids,
                                       swap_forward_input_mask,
                                       swap_forward_segment_ids,
                                       swap_forward_text_len,
                                       swap_backward_input_ids,
                                       swap_backward_input_mask,
                                       swap_backward_segment_ids,
                                       swap_backward_text_len,
                                       swap_labels,
                                       nli_forward_input_ids,
                                       nli_forward_input_mask,
                                       nli_forward_segment_ids,
                                       nli_forward_text_len,
                                       nli_backward_input_ids,
                                       nli_backward_input_mask,
                                       nli_backward_segment_ids,
                                       nli_backward_text_len,
                                       nli_labels,
                                       num_nli_labels,
                                       use_one_hot_embeddings,
                                       l2_reg_lambda=0.1,
                                       dropout_rate=1.0,
                                       lstm_size=None,
                                       num_layers=1):

    config = copy.deepcopy(bert_config)

    if not is_training:
        config.hidden_dropout_prob = 0.0
        config.attention_probs_dropout_prob = 0.0

    with tf.variable_scope("bert", reuse=tf.AUTO_REUSE):

        with tf.variable_scope("embeddings", reuse=tf.AUTO_REUSE):
            (response_embedding_output,
             response_embedding_table) = modeling.embedding_lookup(
                 input_ids=response_input_ids,
                 vocab_size=config.vocab_size,
                 embedding_size=config.hidden_size,
                 initializer_range=config.initializer_range,
                 word_embedding_name="word_embeddings",
                 use_one_hot_embeddings=use_one_hot_embeddings)

            response_embedding_output = modeling.embedding_postprocessor(
                input_tensor=response_embedding_output,
                use_token_type=not config.roberta,
                token_type_ids=response_segment_ids,
                token_type_vocab_size=config.type_vocab_size,
                token_type_embedding_name="token_type_embeddings",
                use_position_embeddings=True,
                position_embedding_name="position_embeddings",
                initializer_range=config.initializer_range,
                max_position_embeddings=config.max_position_embeddings,
                dropout_prob=config.hidden_dropout_prob,
                roberta=config.roberta)

            # random detection
            # Perform embedding lookup on the word ids.
            (random_foward_embedding_output,
             random_forward_embedding_table) = modeling.embedding_lookup(
                 input_ids=random_forward_input_ids,
                 vocab_size=config.vocab_size,
                 embedding_size=config.hidden_size,
                 initializer_range=config.initializer_range,
                 word_embedding_name="word_embeddings",
                 use_one_hot_embeddings=use_one_hot_embeddings)

            # Perform embedding lookup on the word ids.
            (random_backward_embedding_output,
             random_backward_embedding_table) = modeling.embedding_lookup(
                 input_ids=random_backward_input_ids,
                 vocab_size=config.vocab_size,
                 embedding_size=config.hidden_size,
                 initializer_range=config.initializer_range,
                 word_embedding_name="word_embeddings",
                 use_one_hot_embeddings=use_one_hot_embeddings)

            # Add positional embeddings and token type embeddings, then layer
            # normalize and perform dropout.
            random_foward_embedding_output = modeling.embedding_postprocessor(
                input_tensor=random_foward_embedding_output,
                use_token_type=not config.roberta,
                token_type_ids=random_forward_segment_ids,
                token_type_vocab_size=config.type_vocab_size,
                token_type_embedding_name="token_type_embeddings",
                use_position_embeddings=True,
                position_embedding_name="position_embeddings",
                initializer_range=config.initializer_range,
                max_position_embeddings=config.max_position_embeddings,
                dropout_prob=config.hidden_dropout_prob,
                roberta=config.roberta)

            random_backward_embedding_output = modeling.embedding_postprocessor(
                input_tensor=random_backward_embedding_output,
                use_token_type=not config.roberta,
                token_type_ids=random_backward_segment_ids,
                token_type_vocab_size=config.type_vocab_size,
                token_type_embedding_name="token_type_embeddings",
                use_position_embeddings=True,
                position_embedding_name="position_embeddings",
                initializer_range=config.initializer_range,
                max_position_embeddings=config.max_position_embeddings,
                dropout_prob=config.hidden_dropout_prob,
                roberta=config.roberta)

            # swap detection
            (swap_foward_embedding_output,
             swap_forward_embedding_table) = modeling.embedding_lookup(
                 input_ids=swap_forward_input_ids,
                 vocab_size=config.vocab_size,
                 embedding_size=config.hidden_size,
                 initializer_range=config.initializer_range,
                 word_embedding_name="word_embeddings",
                 use_one_hot_embeddings=use_one_hot_embeddings)

            (swap_backward_embedding_output,
             swap_backward_embedding_table) = modeling.embedding_lookup(
                 input_ids=swap_backward_input_ids,
                 vocab_size=config.vocab_size,
                 embedding_size=config.hidden_size,
                 initializer_range=config.initializer_range,
                 word_embedding_name="word_embeddings",
                 use_one_hot_embeddings=use_one_hot_embeddings)
            swap_foward_embedding_output = modeling.embedding_postprocessor(
                input_tensor=swap_foward_embedding_output,
                use_token_type=not config.roberta,
                token_type_ids=swap_forward_segment_ids,
                token_type_vocab_size=config.type_vocab_size,
                token_type_embedding_name="token_type_embeddings",
                use_position_embeddings=True,
                position_embedding_name="position_embeddings",
                initializer_range=config.initializer_range,
                max_position_embeddings=config.max_position_embeddings,
                dropout_prob=config.hidden_dropout_prob,
                roberta=config.roberta)
            swap_backward_embedding_output = modeling.embedding_postprocessor(
                input_tensor=swap_backward_embedding_output,
                use_token_type=not config.roberta,
                token_type_ids=swap_backward_segment_ids,
                token_type_vocab_size=config.type_vocab_size,
                token_type_embedding_name="token_type_embeddings",
                use_position_embeddings=True,
                position_embedding_name="position_embeddings",
                initializer_range=config.initializer_range,
                max_position_embeddings=config.max_position_embeddings,
                dropout_prob=config.hidden_dropout_prob,
                roberta=config.roberta)

            # # generic detection
            # (generic_foward_embedding_output, generic_forward_embedding_table) = modeling.embedding_lookup(
            #     input_ids=generic_forward_input_ids,
            #     vocab_size=config.vocab_size,
            #     embedding_size=config.hidden_size,
            #     initializer_range=config.initializer_range,
            #     word_embedding_name="word_embeddings",
            #     use_one_hot_embeddings=use_one_hot_embeddings)
            # (generic_backward_embedding_output, generic_backward_embedding_table) = modeling.embedding_lookup(
            #     input_ids=generic_backward_input_ids,
            #     vocab_size=config.vocab_size,
            #     embedding_size=config.hidden_size,
            #     initializer_range=config.initializer_range,
            #     word_embedding_name="word_embeddings",
            #     use_one_hot_embeddings=use_one_hot_embeddings)
            # generic_foward_embedding_output = modeling.embedding_postprocessor(
            #     input_tensor=generic_foward_embedding_output,
            #     use_token_type=not config.roberta,
            #     token_type_ids=generic_forward_segment_ids,
            #     token_type_vocab_size=config.type_vocab_size,
            #     token_type_embedding_name="token_type_embeddings",
            #     use_position_embeddings=True,
            #     position_embedding_name="position_embeddings",
            #     initializer_range=config.initializer_range,
            #     max_position_embeddings=config.max_position_embeddings,
            #     dropout_prob=config.hidden_dropout_prob,
            #     roberta=config.roberta)
            # generic_backward_embedding_output = modeling.embedding_postprocessor(
            #     input_tensor=generic_backward_embedding_output,
            #     use_token_type=not config.roberta,
            #     token_type_ids=generic_backward_segment_ids,
            #     token_type_vocab_size=config.type_vocab_size,
            #     token_type_embedding_name="token_type_embeddings",
            #     use_position_embeddings=True,
            #     position_embedding_name="position_embeddings",
            #     initializer_range=config.initializer_range,
            #     max_position_embeddings=config.max_position_embeddings,
            #     dropout_prob=config.hidden_dropout_prob,
            #     roberta=config.roberta)

            # nli detection
            (nli_foward_embedding_output,
             nli_forward_embedding_table) = modeling.embedding_lookup(
                 input_ids=nli_forward_input_ids,
                 vocab_size=config.vocab_size,
                 embedding_size=config.hidden_size,
                 initializer_range=config.initializer_range,
                 word_embedding_name="word_embeddings",
                 use_one_hot_embeddings=use_one_hot_embeddings)
            (nli_backward_embedding_output,
             nli_backward_embedding_table) = modeling.embedding_lookup(
                 input_ids=nli_backward_input_ids,
                 vocab_size=config.vocab_size,
                 embedding_size=config.hidden_size,
                 initializer_range=config.initializer_range,
                 word_embedding_name="word_embeddings",
                 use_one_hot_embeddings=use_one_hot_embeddings)
            nli_foward_embedding_output = modeling.embedding_postprocessor(
                input_tensor=nli_foward_embedding_output,
                use_token_type=not config.roberta,
                token_type_ids=nli_forward_segment_ids,
                token_type_vocab_size=config.type_vocab_size,
                token_type_embedding_name="token_type_embeddings",
                use_position_embeddings=True,
                position_embedding_name="position_embeddings",
                initializer_range=config.initializer_range,
                max_position_embeddings=config.max_position_embeddings,
                dropout_prob=config.hidden_dropout_prob,
                roberta=config.roberta)
            nli_backward_embedding_output = modeling.embedding_postprocessor(
                input_tensor=nli_backward_embedding_output,
                use_token_type=not config.roberta,
                token_type_ids=nli_backward_segment_ids,
                token_type_vocab_size=config.type_vocab_size,
                token_type_embedding_name="token_type_embeddings",
                use_position_embeddings=True,
                position_embedding_name="position_embeddings",
                initializer_range=config.initializer_range,
                max_position_embeddings=config.max_position_embeddings,
                dropout_prob=config.hidden_dropout_prob,
                roberta=config.roberta)

        with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
            response_attention_mask = modeling.create_attention_mask_from_input_mask(
                response_input_ids, response_input_mask)
            # [batch_size, from_seq_length, to_seq_length]
            # mask future tokens
            diag_vals = tf.ones_like(response_attention_mask[0, :, :])
            tril = tf.linalg.LinearOperatorLowerTriangular(
                diag_vals).to_dense()
            future_masks = tf.tile(tf.expand_dims(
                tril, 0), [tf.shape(response_attention_mask)[0], 1, 1])
            response_attention_mask = tf.math.multiply(response_attention_mask,
                                                       future_masks)
            # Run the stacked transformer.
            # `sequence_output` shape = [batch_size, seq_length, hidden_size].
            response_all_encoder_layers = modeling.transformer_model(
                input_tensor=response_embedding_output,
                attention_mask=response_attention_mask,
                hidden_size=config.hidden_size,
                num_hidden_layers=config.num_hidden_layers,
                num_attention_heads=config.num_attention_heads,
                intermediate_size=config.intermediate_size,
                intermediate_act_fn=modeling.get_activation(config.hidden_act),
                hidden_dropout_prob=config.hidden_dropout_prob,
                attention_probs_dropout_prob=config.
                attention_probs_dropout_prob,
                initializer_range=config.initializer_range,
                do_return_all_layers=True)

            # random detection
            # This converts a 2D mask of shape [batch_size, seq_length] to a 3D
            # mask of shape [batch_size, seq_length, seq_length] which is used
            # for the attention scores.
            random_forward_attention_mask = modeling.create_attention_mask_from_input_mask(
                random_forward_input_ids, random_forward_input_mask)
            random_backward_attention_mask = modeling.create_attention_mask_from_input_mask(
                random_backward_input_ids, random_backward_input_mask)
            # Run the stacked transformer.
            # `sequence_output` shape = [batch_size, seq_length, hidden_size].
            random_forward_all_encoder_layers = modeling.transformer_model(
                input_tensor=random_foward_embedding_output,
                attention_mask=random_forward_attention_mask,
                hidden_size=config.hidden_size,
                num_hidden_layers=config.num_hidden_layers,
                num_attention_heads=config.num_attention_heads,
                intermediate_size=config.intermediate_size,
                intermediate_act_fn=modeling.get_activation(config.hidden_act),
                hidden_dropout_prob=config.hidden_dropout_prob,
                attention_probs_dropout_prob=config.
                attention_probs_dropout_prob,
                initializer_range=config.initializer_range,
                do_return_all_layers=True)
            random_backward_all_encoder_layers = modeling.transformer_model(
                input_tensor=random_backward_embedding_output,
                attention_mask=random_backward_attention_mask,
                hidden_size=config.hidden_size,
                num_hidden_layers=config.num_hidden_layers,
                num_attention_heads=config.num_attention_heads,
                intermediate_size=config.intermediate_size,
                intermediate_act_fn=modeling.get_activation(config.hidden_act),
                hidden_dropout_prob=config.hidden_dropout_prob,
                attention_probs_dropout_prob=config.
                attention_probs_dropout_prob,
                initializer_range=config.initializer_range,
                do_return_all_layers=True)

            # swap detection
            swap_forward_attention_mask = modeling.create_attention_mask_from_input_mask(
                swap_forward_input_ids, swap_forward_input_mask)
            swap_backward_attention_mask = modeling.create_attention_mask_from_input_mask(
                swap_backward_input_ids, swap_backward_input_mask)
            swap_forward_all_encoder_layers = modeling.transformer_model(
                input_tensor=swap_foward_embedding_output,
                attention_mask=swap_forward_attention_mask,
                hidden_size=config.hidden_size,
                num_hidden_layers=config.num_hidden_layers,
                num_attention_heads=config.num_attention_heads,
                intermediate_size=config.intermediate_size,
                intermediate_act_fn=modeling.get_activation(config.hidden_act),
                hidden_dropout_prob=config.hidden_dropout_prob,
                attention_probs_dropout_prob=config.
                attention_probs_dropout_prob,
                initializer_range=config.initializer_range,
                do_return_all_layers=True)
            swap_backward_all_encoder_layers = modeling.transformer_model(
                input_tensor=swap_backward_embedding_output,
                attention_mask=swap_backward_attention_mask,
                hidden_size=config.hidden_size,
                num_hidden_layers=config.num_hidden_layers,
                num_attention_heads=config.num_attention_heads,
                intermediate_size=config.intermediate_size,
                intermediate_act_fn=modeling.get_activation(config.hidden_act),
                hidden_dropout_prob=config.hidden_dropout_prob,
                attention_probs_dropout_prob=config.
                attention_probs_dropout_prob,
                initializer_range=config.initializer_range,
                do_return_all_layers=True)

            # # generic detection
            # generic_forward_attention_mask = modeling.create_attention_mask_from_input_mask(generic_forward_input_ids,
            #                                                                                 generic_forward_input_mask)
            # generic_backward_attention_mask = modeling.create_attention_mask_from_input_mask(generic_backward_input_ids,
            #                                                                                  generic_backward_input_mask)
            # generic_forward_all_encoder_layers = modeling.transformer_model(
            #     input_tensor=generic_foward_embedding_output,
            #     attention_mask=generic_forward_attention_mask,
            #     hidden_size=config.hidden_size,
            #     num_hidden_layers=config.num_hidden_layers,
            #     num_attention_heads=config.num_attention_heads,
            #     intermediate_size=config.intermediate_size,
            #     intermediate_act_fn=modeling.get_activation(config.hidden_act),
            #     hidden_dropout_prob=config.hidden_dropout_prob,
            #     attention_probs_dropout_prob=config.attention_probs_dropout_prob,
            #     initializer_range=config.initializer_range,
            #     do_return_all_layers=True)
            # generic_backward_all_encoder_layers = modeling.transformer_model(
            #     input_tensor=generic_backward_embedding_output,
            #     attention_mask=generic_backward_attention_mask,
            #     hidden_size=config.hidden_size,
            #     num_hidden_layers=config.num_hidden_layers,
            #     num_attention_heads=config.num_attention_heads,
            #     intermediate_size=config.intermediate_size,
            #     intermediate_act_fn=modeling.get_activation(config.hidden_act),
            #     hidden_dropout_prob=config.hidden_dropout_prob,
            #     attention_probs_dropout_prob=config.attention_probs_dropout_prob,
            #     initializer_range=config.initializer_range,
            #     do_return_all_layers=True)

            # nli detection
            nli_forward_attention_mask = modeling.create_attention_mask_from_input_mask(
                nli_forward_input_ids, nli_forward_input_mask)
            nli_backward_attention_mask = modeling.create_attention_mask_from_input_mask(
                nli_backward_input_ids, nli_backward_input_mask)
            nli_forward_all_encoder_layers = modeling.transformer_model(
                input_tensor=nli_foward_embedding_output,
                attention_mask=nli_forward_attention_mask,
                hidden_size=config.hidden_size,
                num_hidden_layers=config.num_hidden_layers,
                num_attention_heads=config.num_attention_heads,
                intermediate_size=config.intermediate_size,
                intermediate_act_fn=modeling.get_activation(config.hidden_act),
                hidden_dropout_prob=config.hidden_dropout_prob,
                attention_probs_dropout_prob=config.
                attention_probs_dropout_prob,
                initializer_range=config.initializer_range,
                do_return_all_layers=True)
            nli_backward_all_encoder_layers = modeling.transformer_model(
                input_tensor=nli_backward_embedding_output,
                attention_mask=nli_backward_attention_mask,
                hidden_size=config.hidden_size,
                num_hidden_layers=config.num_hidden_layers,
                num_attention_heads=config.num_attention_heads,
                intermediate_size=config.intermediate_size,
                intermediate_act_fn=modeling.get_activation(config.hidden_act),
                hidden_dropout_prob=config.hidden_dropout_prob,
                attention_probs_dropout_prob=config.
                attention_probs_dropout_prob,
                initializer_range=config.initializer_range,
                do_return_all_layers=True)

        random_forward_embedding = random_forward_all_encoder_layers[-2]
        random_backward_embedding = random_backward_all_encoder_layers[-2]
        swap_forward_embedding = swap_forward_all_encoder_layers[-2]
        swap_backward_embedding = swap_backward_all_encoder_layers[-2]
        # generic_forward_embedding = generic_forward_all_encoder_layers[-2]
        # generic_backward_embedding = generic_backward_all_encoder_layers[-2]
        nli_forward_embedding = nli_forward_all_encoder_layers[-2]
        nli_backward_embedding = nli_backward_all_encoder_layers[-2]
        response_embedding = response_all_encoder_layers[-2]

    response_embedding_shape = modeling.get_shape_list(response_embedding,
                                                       expected_rank=3)
    with tf.variable_scope("lm_head", reuse=tf.AUTO_REUSE):

        response_logits = tf.layers.dense(response_embedding,
                                          config.hidden_size,
                                          activation=None)
        response_logits = modeling.gelu(response_logits)
        response_logits = modeling.layer_norm(response_logits)
        response_outputs = tf.layers.dense(
            response_logits,
            config.vocab_size,
            activation=None,
            use_bias=True,
            bias_initializer=tf.zeros_initializer())

        response_one_hot = tf.one_hot(response_labels,
                                      depth=config.vocab_size,
                                      dtype=tf.float32)

        lm_cost = tf.nn.softmax_cross_entropy_with_logits(
            labels=response_one_hot, logits=response_outputs)

        sequence_mask = tf.sequence_mask(response_text_len,
                                         maxlen=response_embedding_shape[1],
                                         dtype=tf.float32)

        masked_lm_cost = tf.math.multiply(lm_cost, sequence_mask)

        final_lm_loss = tf.reduce_mean(
            tf.math.divide(tf.reduce_sum(masked_lm_cost, axis=1),
                           tf.cast(response_text_len, dtype=tf.float32)))

        perplexity = tf.exp(
            tf.math.divide(tf.reduce_sum(masked_lm_cost, axis=1),
                           tf.cast(response_text_len, dtype=tf.float32)))

    random_forward_embedding_shape = modeling.get_shape_list(
        random_forward_embedding, expected_rank=3)
    random_backward_embedding_shape = modeling.get_shape_list(
        random_backward_embedding, expected_rank=3)
    assert random_forward_embedding_shape[
        2] == random_backward_embedding_shape[2]
    random_forward_embedding = tf.transpose(random_forward_embedding,
                                            [1, 0, 2])
    random_backward_embedding = tf.transpose(random_backward_embedding,
                                             [1, 0, 2])
    random_forward_input_mask = tf.cast(
        tf.transpose(random_forward_input_mask, [1, 0]), tf.float32)
    random_backward_input_mask = tf.cast(
        tf.transpose(random_backward_input_mask, [1, 0]), tf.float32)

    swap_forward_embedding_shape = modeling.get_shape_list(
        swap_forward_embedding, expected_rank=3)
    swap_backward_embedding_shape = modeling.get_shape_list(
        swap_backward_embedding, expected_rank=3)
    assert swap_forward_embedding_shape[2] == swap_backward_embedding_shape[2]
    swap_forward_embedding = tf.transpose(swap_forward_embedding, [1, 0, 2])
    swap_backward_embedding = tf.transpose(swap_backward_embedding, [1, 0, 2])
    swap_forward_input_mask = tf.cast(
        tf.transpose(swap_forward_input_mask, [1, 0]), tf.float32)
    swap_backward_input_mask = tf.cast(
        tf.transpose(swap_backward_input_mask, [1, 0]), tf.float32)

    # generic_forward_embedding_shape = modeling.get_shape_list(generic_forward_embedding, expected_rank=3)
    # generic_backward_embedding_shape = modeling.get_shape_list(generic_backward_embedding, expected_rank=3)
    # assert generic_forward_embedding_shape[2] == generic_backward_embedding_shape[2]
    # generic_forward_embedding = tf.transpose(generic_forward_embedding, [1, 0, 2])
    # generic_backward_embedding = tf.transpose(generic_backward_embedding, [1, 0, 2])
    # generic_forward_input_mask = tf.cast(tf.transpose(generic_forward_input_mask, [1, 0]), tf.float32)
    # generic_backward_input_mask = tf.cast(tf.transpose(generic_backward_input_mask, [1, 0]), tf.float32)

    nli_forward_embedding_shape = modeling.get_shape_list(
        nli_forward_embedding, expected_rank=3)
    nli_backward_embedding_shape = modeling.get_shape_list(
        nli_backward_embedding, expected_rank=3)
    assert nli_forward_embedding_shape[2] == nli_backward_embedding_shape[2]
    nli_forward_embedding = tf.transpose(nli_forward_embedding, [1, 0, 2])
    nli_backward_embedding = tf.transpose(nli_backward_embedding, [1, 0, 2])
    nli_forward_input_mask = tf.cast(
        tf.transpose(nli_forward_input_mask, [1, 0]), tf.float32)
    nli_backward_input_mask = tf.cast(
        tf.transpose(nli_backward_input_mask, [1, 0]), tf.float32)

    model = HadeModel(
        x_random_forward=random_forward_embedding,
        x_random_mask_forward=random_forward_input_mask,
        x_random_length_forward=random_forward_text_len,
        x_random_backward=random_backward_embedding,
        x_random_mask_backward=random_backward_input_mask,
        x_random_length_backward=random_backward_text_len,
        y_random=random_labels,
        x_swap_forward=swap_forward_embedding,
        x_swap_mask_forward=swap_forward_input_mask,
        x_swap_length_forward=swap_forward_text_len,
        x_swap_backward=swap_backward_embedding,
        x_swap_mask_backward=swap_backward_input_mask,
        x_swap_length_backward=swap_backward_text_len,
        y_swap=swap_labels,
        # x_generic_forward=generic_forward_embedding,
        # x_generic_mask_forward=generic_forward_input_mask,
        # x_generic_length_forward=generic_forward_text_len,
        # x_generic_backward=generic_backward_embedding,
        # x_generic_mask_backward=generic_backward_input_mask,
        # x_generic_length_backward=generic_backward_text_len, y_generic=generic_labels,
        x_nli_forward=nli_forward_embedding,
        x_nli_mask_forward=nli_forward_input_mask,
        x_nli_length_forward=nli_forward_text_len,
        x_nli_backward=nli_backward_embedding,
        x_nli_mask_backward=nli_backward_input_mask,
        x_nli_length_backward=nli_backward_text_len,
        y_nli=nli_labels,
        embedding_dim=random_forward_embedding_shape[2],
        num_nli_labels=num_nli_labels,
        hidden_size=lstm_size,
        l2_reg_lambda=l2_reg_lambda,
        num_layers=num_layers,
        dropout_rate=dropout_rate,
        is_training=is_training)

    random_prob, swap_prob, nli_prob, total_cost = model.create_model()

    return random_prob, swap_prob, nli_prob, total_cost, final_lm_loss, perplexity
Example #59
0
def parameter_attention(x,
                        total_key_depth,
                        total_value_depth,
                        output_depth,
                        memory_rows,
                        num_heads,
                        dropout_rate,
                        name=None):
    """Attention over parameters.

    We use the same multi-headed attention as in the other layers, but the memory
    keys and values are model parameters.  There are no linear transformation
    on the keys or values.

    We are also a bit more careful about memory usage, since the number of
    memory positions may be very large.

    Args:
      x: a Tensor with shape [batch, length_q, channels]
      total_key_depth: an integer
      total_value_depth: an integer
      output_depth: an integer
      memory_rows: an integer
      num_heads: an integer dividing total_key_depth and total_value_depth
      dropout_rate: a floating point number
      name: an optional string

    Returns:
      A Tensor.
    """
    with tf.variable_scope(name, default_name="parameter_attention",
                           values=[x]):
        head_size_k = total_key_depth // num_heads
        head_size_v = total_value_depth // num_heads
        var_shape_k = [num_heads, memory_rows, head_size_k]
        var_shape_v = [num_heads, memory_rows, head_size_v]
        k = tf.get_variable(
            "k", var_shape_k,
            initializer=tf.random_normal_initializer(
                0, output_depth ** -0.5)) * (num_heads ** 0.5)
        v = tf.get_variable(
            "v", var_shape_v,
            initializer=tf.random_normal_initializer(
                0, output_depth ** -0.5)) * (output_depth ** 0.5)
        batch_size = tf.shape(x)[0]
        length = tf.shape(x)[1]
        q = common_layers.conv1d(x, total_key_depth, 1, name="q_transform")
        if dropout_rate:
            # This is a cheaper form of attention dropout where we use to use
            # the same dropout decisions across batch elemets and query positions,
            # but different decisions across heads and memory positions.
            v = tf.nn.dropout(v, 1.0 - dropout_rate,
                              noise_shape=[num_heads, memory_rows, 1])
        # query is [batch, length, hidden_size]
        # reshape and transpose it to [heads, batch * length, head_size]
        q = tf.reshape(q, [batch_size, length, num_heads, head_size_k])
        q = tf.transpose(q, [2, 0, 1, 3])
        q = tf.reshape(q, [num_heads, batch_size * length, head_size_k])
        weights = tf.matmul(q, k, transpose_b=True)
        weights = tf.nn.softmax(weights)
        y = tf.matmul(weights, v)
        y = tf.reshape(y, [num_heads, batch_size, length, head_size_v])
        y = tf.transpose(y, [1, 2, 0, 3])
        y = tf.reshape(y, [batch_size, length, total_value_depth])
        y.set_shape([None, None, total_value_depth])
        y = common_layers.conv1d(y, output_depth, 1, name="output_transform")
        return y
Example #60
0
File: gan.py Project: fendaq/gan
    def _create_graph(self):
        """Creates the computational graph.

        :return: The computational graph.
        """

        eps = 1e-5
        with tf.Graph().as_default() as graph:
            tf.set_random_seed(
                self.seed
            )  # Fix the random seed for randomized tensorflow operations.

            self.x = tf.placeholder(tf.float32, shape=(None, 1))
            self.z = tf.placeholder(tf.float32, shape=(None, 1))

            with tf.variable_scope(
                    'generator'):  # Create generator operations.
                self.G = self._create_generator()
                self.xg = self.G(self.z)

            if self.model == 'gan':

                with tf.variable_scope(
                        'discriminator'):  # Create critic operations.
                    D = self._create_discriminator()  # D prob and D logit
                    self.D_real = tf.sigmoid(D(self.x))  # discriminate real
                    self.D_fake = tf.sigmoid(D(self.xg))  # discriminate fake

                    epsilon = tf.random_uniform(shape=tf.shape(self.x),
                                                minval=0.,
                                                maxval=1.)
                    interpolation = epsilon * self.x + (1 - epsilon) * self.xg
                    penalty = (
                        tf.norm(tf.gradients(D(interpolation), interpolation),
                                axis=1) - 1)**2.0

                #GAN
                self.loss_d = tf.reduce_mean(-tf.log(self.D_real + eps) -
                                             tf.log(1 - self.D_fake +
                                                    eps)) * 0.5
                self.loss_g = tf.reduce_mean(-tf.log(self.D_fake + eps))

            elif self.model == 'rgan':
                with tf.variable_scope(
                        'encoder'):  # Create encoder operations.
                    self.E = self._create_encoder()
                    self.ze = self.E(self.x)
                    self.xr = self.G(self.ze)
                with tf.variable_scope(
                        'discriminator'):  # Create critic operations.
                    D = self._create_discriminator()  # D prob and D logit
                    self.D_real = tf.sigmoid(D(self.x))  # discriminate real
                    self.D_fake = tf.sigmoid(D(self.xg))  # discriminate fake
                    self.D_recon = tf.sigmoid(D(self.xr))  # discriminate recon
                    self.mse = tf.reduce_sum(tf.square(self.x - self.xr), 1)

                    lambda1 = 1e-2
                    lambda2 = 1e-2
                    self.loss_d = tf.reduce_mean(-tf.log(self.D_real + eps) -
                                                 tf.log(1 - self.D_fake + eps))
                    self.loss_e = tf.reduce_mean(lambda1 * self.mse +
                                                 lambda2 * self.D_recon)
                    self.loss_g = tf.reduce_mean(-tf.log(self.D_fake + eps) +
                                                 self.loss_e)

            elif self.model == 'mdgan':
                with tf.variable_scope(
                        'encoder'):  # Create encoder operations.
                    self.E = self._create_encoder()
                    self.ze = self.E(self.x)
                    self.xr = self.G(self.ze)
                with tf.variable_scope(
                        'discriminator1'):  # Create critic operations.
                    D1 = self._create_discriminator()  # D prob and D logit
                    self.D1_real = tf.sigmoid(D1(self.x))  # discriminate real
                    self.D1_recon = tf.sigmoid(D1(
                        self.xr))  # discriminate recon
                    self.mse = tf.reduce_sum(tf.square(self.x - self.xr), 1)

                    lambda1 = 1e-2
                    lambda2 = 1e-2
                    self.loss_d1 = tf.reduce_mean(-tf.log(self.D1_real + eps) -
                                                  tf.log(1 - self.D1_recon +
                                                         eps))
                    self.loss_g1 = tf.reduce_mean(self.mse -
                                                  lambda1 * self.D1_recon)

                with tf.variable_scope(
                        'discriminator'):  # Create critic operations.
                    D = self._create_discriminator()  # D prob and D logit
                    self.D_fake = tf.sigmoid(D(self.xg))  # discriminate fake
                    self.D_real = tf.sigmoid(D(self.xr))  # discriminate recon

                    self.loss_d = tf.reduce_mean(-tf.log(self.D_real + eps) -
                                                 tf.log(1 - self.D_fake + eps))
                    self.loss_g = tf.reduce_mean(-tf.log(self.D_fake + eps))

            elif self.model == 'vaegan':

                self.ep = tf.random_normal(shape=[tf.shape(self.x)[0], 1])

                with tf.variable_scope(
                        'encoder'):  # Create encoder operations.
                    self.Em, self.Es = self._create_encoder_vaegan()
                    self.ze_m = self.Em(self.x)
                    self.ze_s = self.Es(self.x)
                    self.ze_x = tf.add(self.ze_m,
                                       tf.sqrt(tf.exp(self.ze_s)) * self.ep)
                    self.xr = self.G(self.ze_x)

                with tf.variable_scope('discriminator'):
                    D = self._create_discriminator()  # D prob and D logit.
                    self.D_real = tf.sigmoid(D(self.x))
                    self.D_recon = tf.sigmoid(D(self.xr))
                    self.D_fake = tf.sigmoid(D(self.xg))

# if want to use gradient penalty?
# epsilon = tf.random_uniform(shape=tf.shape(self.x), minval=0., maxval=1.)
# interpolation = epsilon * self.x + (1 - epsilon) * self.xg
# penalty = (tf.norm(tf.gradients(D(interpolation), interpolation), axis=1) - 1) ** 2.0

# kl loss
                self.kl = self.kl_loss(self.ze_m, self.ze_s)
                # gan loss
                self.ld = tf.reduce_mean(-tf.log(self.D_real + eps) -
                                         tf.log(1 - self.D_recon + eps) -
                                         tf.log(1 - self.D_fake + eps))
                # preceptual loss (feature loss or reconstruction loss)
                self.lr = tf.reduce_mean(self.nll_normal(self.xr, self.x))
                # encoder
                self.le = -self.lr + self.kl / (self.n_batch)
                # generator
                self.lg = tf.reduce_mean(-tf.log(self.D_fake + eps) -
                                         tf.log(self.D_recon +
                                                eps)) - 1e-6 * self.lr

                self.loss_d = self.ld  #+ self.lambda_reg * penalty #for use penalty as gaan
                self.loss_g = self.lg
                self.loss_e = self.le

                self.loss_d = tf.reshape(self.loss_d, [])  #convert to scalar

            elif self.model == 'wgangp':
                with tf.variable_scope(
                        'discriminator'):  # Create critic operations.
                    D = self._create_discriminator()  # D prob and D logit.
                    self.D_real = D(self.x)  # Criticize real data.
                    self.D_fake = D(self.xg)  # Criticize generated data.
                    diff = tf.abs(tf.reduce_mean(self.D_real - self.D_fake))
                    # Create the gradient penalty operations.
                    epsilon = tf.random_uniform(shape=tf.shape(self.x),
                                                minval=0.,
                                                maxval=1.)
                    interpolation = epsilon * self.x + (1 - epsilon) * self.xg
                    penalty = (
                        tf.norm(tf.gradients(D(interpolation), interpolation),
                                axis=1) - 1)**2.0

                self.loss_d = tf.reduce_mean(self.D_fake - self.D_real +
                                             self.lambda_reg * penalty)
                self.loss_g = -tf.reduce_mean(self.D_fake)

            elif self.model == 'gaan':
                with tf.variable_scope(
                        'encoder'):  # Create generator operations.
                    self.E = self._create_encoder()
                    self.ze = self.E(self.x)
                    self.xr = self.G(self.ze)

                    #encoder xg
                    self.zg = self.E(self.xg)

                with tf.variable_scope(
                        'discriminator'):  # Create critic operations.
                    D = self._create_discriminator()  # D prob and D logit
                    self.D_real_logit = D(self.x)  # discriminate real
                    self.D_fake_logit = D(self.xg)  # discriminate fake
                    self.D_recon_logit = D(self.xr)  # discriminate fake
                    self.D_real = tf.sigmoid(
                        self.D_real_logit)  # discriminate real
                    self.D_fake = tf.sigmoid(
                        self.D_fake_logit)  # discriminate fake
                    self.D_recon = tf.sigmoid(
                        self.D_recon_logit)  # discriminate fake
                    diff = tf.abs(tf.reduce_mean(self.D_real - self.D_fake))
                    # Create the gradient penalty operations.
                    epsilon = tf.random_uniform(shape=tf.shape(self.x),
                                                minval=0.,
                                                maxval=1.)
                    interpolation = epsilon * self.x + (1 - epsilon) * self.xg
                    penalty = (
                        tf.norm(tf.gradients(D(interpolation), interpolation),
                                axis=1) - 1)**2.0
                    #penalty = (tf.norm(tf.gradients(D(interpolation), interpolation), axis=1) - tf.minimum(diff,1.0)) ** 2.0

                self.recon = tf.reduce_mean(
                    tf.square(self.x - self.xr))  #reconstruction
                self.loss_x = tf.reduce_mean(self.x - self.xg)
                self.loss_z = tf.reduce_mean(self.ze - self.z)
                self.reg = tf.square(self.loss_x - self.loss_z)

                self.ld = tf.reduce_mean(-0.5 * tf.log(self.D_real + eps) -
                                         0.5 * tf.log(self.D_recon + eps) -
                                         tf.log(1 - self.D_fake + eps))
                self.lg = tf.abs(tf.reduce_mean(self.D_real - self.D_fake))
                self.loss_d = self.ld + self.lambda_reg * penalty
                self.loss_r = self.recon + 0.1 * self.reg
                self.loss_g = self.lg

                self.loss_d = tf.reshape(self.loss_d, [])  #convert to scalar
                self.loss_g = tf.reshape(self.loss_g, [])  #//
                self.loss_r = tf.reshape(self.loss_r, [])  #//

            # Store the variables of the critic and the generator.
            self.vars_d = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            scope='discriminator')
            self.vars_g = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            scope='generator')

            # Create optimizer operations for critic and generator.
            self.opt_d = self._create_optimizer(self.loss_d, self.vars_d,
                                                self.learning_rate, self.beta1,
                                                self.beta2)
            self.opt_g = self._create_optimizer(self.loss_g, self.vars_g,
                                                self.learning_rate, self.beta1,
                                                self.beta2)

            if self.model == 'rgan':
                self.vars_e = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder')
                self.opt_e = self._create_optimizer(self.loss_e, self.vars_e,
                                                    self.learning_rate,
                                                    self.beta1, self.beta2)
            elif self.model == 'mdgan':
                self.vars_e = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder')
                self.vars_d1 = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator1')
                self.opt_d1 = self._create_optimizer(self.loss_d1,
                                                     self.vars_d1,
                                                     self.learning_rate,
                                                     self.beta1, self.beta2)
                self.opt_g1 = self._create_optimizer(self.loss_g1,
                                                     self.vars_g + self.vars_e,
                                                     self.learning_rate,
                                                     self.beta1, self.beta2)
            elif self.model == 'vaegan':

                self.vars_e = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder')

                #for D
                self.trainer_D = tf.train.RMSPropOptimizer(
                    learning_rate=self.learning_rate)
                self.gradients_D = self.trainer_D.compute_gradients(
                    self.loss_d, var_list=self.vars_d)
                self.clipped_gradients_D = [(tf.clip_by_value(grad, -1.0,
                                                              1.0), var)
                                            for grad, var in self.gradients_D]
                self.opti_D = self.trainer_D.apply_gradients(
                    self.clipped_gradients_D)

                #for G
                self.trainer_G = tf.train.RMSPropOptimizer(
                    learning_rate=self.learning_rate)
                self.gradients_G = self.trainer_G.compute_gradients(
                    self.loss_g, var_list=self.vars_g)
                self.clipped_gradients_G = [(tf.clip_by_value(_[0], -1,
                                                              1.), _[1])
                                            for _ in self.gradients_G]
                self.opti_G = self.trainer_G.apply_gradients(
                    self.clipped_gradients_G)

                #for E
                self.trainer_E = tf.train.RMSPropOptimizer(
                    learning_rate=self.learning_rate)
                self.gradients_E = self.trainer_E.compute_gradients(
                    self.loss_e, var_list=self.vars_e)
                self.clipped_gradients_E = [(tf.clip_by_value(_[0], -1,
                                                              1.), _[1])
                                            for _ in self.gradients_E]
                self.opti_E = self.trainer_E.apply_gradients(
                    self.clipped_gradients_E)

            elif self.model == 'gaan':
                self.vars_e = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder')
                self.opt_r = self._create_optimizer(self.loss_r,
                                                    self.vars_e + self.vars_g,
                                                    self.learning_rate,
                                                    self.beta1, self.beta2)
                self.opt_recon = self._create_optimizer(
                    self.recon, self.vars_e + self.vars_g, self.learning_rate,
                    self.beta1, self.beta2)

            summary_enc_1_params = self.get_weights('encoder/dense/kernel')
            summary_enc_2_params = self.get_weights('encoder/dense_1/kernel')
            tf.summary.histogram('enc_1_params', summary_enc_1_params)
            tf.summary.histogram('enc_2_params', summary_enc_2_params)

            summary_gen_1_params = self.get_weights('generator/dense/kernel')
            summary_gen_2_params = self.get_weights('generator/dense_1/kernel')
            tf.summary.histogram('gen_1_params', summary_gen_1_params)
            tf.summary.histogram('gen_2_params', summary_gen_2_params)

            summary_disc_1_params = self.get_weights(
                'discriminator/dense/kernel')
            summary_disc_2_params = self.get_weights(
                'discriminator/dense_1/kernel')
            tf.summary.histogram('disc_1_params', summary_disc_1_params)
            tf.summary.histogram('disc_2_params', summary_disc_2_params)

            # Create variable initialization operation.
            self.init = tf.global_variables_initializer()
            #graph.finalize()
        return graph