def testEmptyOutputShape1(self):
    indices = tf.zeros([2, 2, 2], tf.int32)
    updates = tf.zeros([2, 2, 2], tf.int32)
    shape = tf.constant([0, 3, 2], tf.int32)

    with self.assertRaisesWithPredicateMatch(
        ValueError, "Indices and updates specified for empty output shape"):
      tf.scatter_nd(indices, updates, shape)
  def testRank3InvalidShape2(self):
    indices = tf.zeros([2, 2, 1], tf.int32)
    updates = tf.zeros([2, 2], tf.int32)
    shape = np.array([2, 2, 2])
    with self.assertRaisesWithPredicateMatch(
        ValueError, "The inner \\d+ dimensions of output\\.shape="):
      tf.scatter_nd(indices, updates, shape)

    ref = tf.Variable(tf.zeros(shape, tf.int32))
    with self.assertRaisesWithPredicateMatch(
        ValueError, "The inner \\d+ dimensions of ref\\.shape="):
      tf.scatter_nd_update(ref, indices, updates)
  def testEmptyOutputShape2(self):
    indices = tf.placeholder(tf.int32, shape=None)
    updates = tf.placeholder(tf.int32, shape=None)
    shape = tf.constant([0, 3, 2], tf.int32)

    with self.test_session():
      tf.scatter_nd(indices, updates, shape).eval(feed_dict={
          indices: np.zeros(
              [2, 2, 2], dtype=np.int32),
          updates: np.zeros(
              [2, 2, 2], dtype=np.int32)
      })
def hnet_loss(gt_pts, transformation_coeffcient, name):
    """
    
    :param gt_pts: 原始的标签点对 [x, y, 1] 
    :param transformation_coeffcient: 映射矩阵参数(6参数矩阵) [[a, b, c], [0, d, e], [0, f, 1]]
    :param name:
    :return: 
    """
    with tf.variable_scope(name):
        # 首先映射原始标签点对
        transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1)
        H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]])
        H_shape = tf.constant([9])
        H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape)
        H = tf.reshape(H, shape=[3, 3])

        gt_pts = tf.transpose(gt_pts)
        pts_projects = tf.matmul(H, gt_pts)

        # 求解最小二乘二阶多项式拟合参数矩阵
        Y = tf.transpose(pts_projects[1, :])
        X = tf.transpose(pts_projects[0, :])
        Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32))
        Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1)
        w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)),
                                tf.transpose(Y_stack)),
                      tf.expand_dims(X, -1))
        # 利用二阶多项式参数求解拟合位置并反算到原始投影空间计算损失
        x_preds = tf.matmul(Y_stack, w)
        preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1))
        x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds)

        loss = tf.reduce_mean(tf.pow(gt_pts[0, :] - x_transformation_back[0, :], 2))

    return loss
def hnet_transformation(gt_pts, transformation_coeffcient, name):
    """

    :param gt_pts:
    :param transformation_coeffcient:
    :param name:
    :return:
    """
    with tf.variable_scope(name):
        # 首先映射原始标签点对
        transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1)
        H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]])
        H_shape = tf.constant([9])
        H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape)
        H = tf.reshape(H, shape=[3, 3])

        gt_pts = tf.transpose(gt_pts)
        pts_projects = tf.matmul(H, gt_pts)

        # 求解最小二乘二阶多项式拟合参数矩阵
        Y = tf.transpose(pts_projects[1, :])
        X = tf.transpose(pts_projects[0, :])
        Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32))
        Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1)
        w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)),
                                tf.transpose(Y_stack)),
                      tf.expand_dims(X, -1))

        # 利用二阶多项式参数求解拟合位置
        x_preds = tf.matmul(Y_stack, w)
        preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1))
        preds_fit = tf.stack([tf.squeeze(x_preds, -1), Y], axis=1)
        x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds)

    return x_transformation_back
    def unpool_layer2x2_batch(self, bottom, argmax):
        bottom_shape = tf.shape(bottom)
        top_shape = [bottom_shape[0], bottom_shape[1] * 2, bottom_shape[2] * 2, bottom_shape[3]]

        batch_size = top_shape[0]
        height = top_shape[1]
        width = top_shape[2]
        channels = top_shape[3]

        argmax_shape = tf.to_int64([batch_size, height, width, channels])
        argmax = self.unravel_argmax(argmax, argmax_shape)

        t1 = tf.to_int64(tf.range(channels))
        t1 = tf.tile(t1, [batch_size * (width // 2) * (height // 2)])
        t1 = tf.reshape(t1, [-1, channels])
        t1 = tf.transpose(t1, perm=[1, 0])
        t1 = tf.reshape(t1, [channels, batch_size, height // 2, width // 2, 1])
        t1 = tf.transpose(t1, perm=[1, 0, 2, 3, 4])

        t2 = tf.to_int64(tf.range(batch_size))
        t2 = tf.tile(t2, [channels * (width // 2) * (height // 2)])
        t2 = tf.reshape(t2, [-1, batch_size])
        t2 = tf.transpose(t2, perm=[1, 0])
        t2 = tf.reshape(t2, [batch_size, channels, height // 2, width // 2, 1])

        t3 = tf.transpose(argmax, perm=[1, 4, 2, 3, 0])

        t = tf.concat(4, [t2, t3, t1])
        indices = tf.reshape(t, [(height // 2) * (width // 2) * channels * batch_size, 4])

        x1 = tf.transpose(bottom, perm=[0, 3, 1, 2])
        values = tf.reshape(x1, [-1])
        return tf.scatter_nd(indices, values, tf.to_int64(top_shape))
Exemple #7
0
def update_slices(slices, indices, dense_tensor, head_dims):
  """Reconstitutes a tensor from slices and corresponding indices.

  Like _stack_tensor, but instead of setting missing slices to 0, sets them to
  what they were in the original tensor. The return value is reshaped to be
  the same as dense_tensor.

  Args:
    slices: a tensor. Shape [K, D_1, ...]
    indices: a 1-D integer tensor. Shape: [K]
    dense_tensor: the original tensor the slices were taken
      from. Shape: [D_0, D_1, ...]
    head_dims: True dimensions of the dense_tensor's first dimension.

  Returns:
    Reconsituted tensor. Shape: [D_0, D_1, ...]
  """
  # NOTE(siege): This cast shouldn't be necessary.
  indices = tf.cast(indices, tf.int32)

  tail_dims = tf.shape(dense_tensor)[1:]
  dense_shape = tf.concat([head_dims, tail_dims], 0)

  update_mask_vals = tf.fill(tf.shape(indices), 1)
  reshaped_indices = tf.expand_dims(indices, -1)
  update_mask = tf.equal(
      tf.scatter_nd(reshaped_indices, update_mask_vals, head_dims[:1]), 1)

  reshaped_dense_slices = tf.reshape(
      stack_tensor(slices, indices, dense_tensor, head_dims), dense_shape)
  reshaped_dense_tensor = tf.reshape(dense_tensor, dense_shape)

  return tf.reshape(
      tf.where(update_mask, reshaped_dense_slices, reshaped_dense_tensor),
      tf.shape(dense_tensor))
Exemple #8
0
def stack_tensor(slices, indices, dense_tensor, head_dims):
  """Reconsititutes a tensor from slices and corresponding indices.

  This is an inverse operation to slice_tensor. Missing slices are set to 0.

  Args:
    slices: a tensor. Shape [K, D_1, ...]
    indices: a 1-D integer tensor. Shape: [K]
    dense_tensor: the original tensor the slices were taken
      from. Shape: [D_0, D_1, ...]
    head_dims: True dimensions of the dense_tensor's first dimension.

  Returns:
    Reconsituted tensor. Shape: [D_0, D_1, ...]
  """
  # NOTE(siege): This cast shouldn't be necessary.
  indices = tf.cast(indices, tf.int32)

  tail_dims = tf.shape(dense_tensor)[1:]
  dense_shape = tf.concat([head_dims, tail_dims], 0)

  slices = tf.reshape(slices, tf.concat([[-1], dense_shape[1:]], 0))
  indices = tf.expand_dims(indices, -1)

  return tf.reshape(tf.scatter_nd(indices, slices, dense_shape),
                    tf.shape(dense_tensor))
Exemple #9
0
 def compute_module(accum, module):
     mask = tf.equal(module, selection)
     reduced_mask = tf.reduce_any(mask, axis=-1)
     indices = tf.where(reduced_mask)
     affected_inp = tf.boolean_mask(inputs, reduced_mask)
     output = module_fnc(affected_inp, module)
     return accum + tf.scatter_nd(indices, output, tf.cast(output_shape, tf.int64))
      def hard_negative_mining():
        bboxes_per_batch = tf.unstack(bboxes)
        classification_loss_per_batch = tf.unstack(classification_loss)
        num_positives_per_batch = tf.unstack(tf.reduce_sum(positives, axis=-1))
        neg_class_loss_per_batch = tf.unstack(neg_class_loss_all)

        neg_class_losses = []
        total_negatives = []

        for bboxes_per_image, classification_loss_per_image, num_positives_per_image, neg_class_loss_per_image in \
            zip(bboxes_per_batch, classification_loss_per_batch, num_positives_per_batch, neg_class_loss_per_batch):
          min_negatives_keep = tf.maximum(self.neg_pos_ratio * num_positives_per_image, 3)
          num_negatives_keep = tf.minimum(min_negatives_keep,
                                          tf.count_nonzero(neg_class_loss_per_image, dtype=tf.float32))

          indices = tf.image.non_max_suppression(bboxes_per_image, classification_loss_per_image,
                                                 tf.to_int32(num_negatives_keep), iou_threshold=0.99)
          num_negatives = tf.size(indices)
          total_negatives.append(num_negatives)
          expanded_indexes = tf.expand_dims(indices, axis=1)  # shape: (num_negatives, 1)
          negatives_keep = tf.scatter_nd(expanded_indexes, updates=tf.ones_like(indices, dtype=tf.int32),
                                         shape=tf.shape(classification_loss_per_image))  # shape: (num_priors,)
          negatives_keep = tf.to_float(tf.reshape(negatives_keep, [num_priors]))  # shape: (batch_size, num_priors)
          neg_class_losses.append(tf.reduce_sum(classification_loss_per_image * negatives_keep, axis=-1))  # shape: (1,)

        return tf.stack(neg_class_losses), tf.reduce_sum(tf.stack(total_negatives))
Exemple #11
0
  def call(self, x, padding=None):
    # Retrieve dynamically known shapes
    batch_size = tf.shape(x)[0]
    length = tf.shape(x)[1]

    if padding is not None:
      with tf.name_scope("remove_padding"):
        # Flatten padding to [batch_size*length]
        pad_mask = tf.reshape(padding, [-1])

        nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9))

        # Reshape x to [batch_size*length, hidden_size] to remove padding
        x = tf.reshape(x, [-1, self.hidden_size])
        x = tf.gather_nd(x, indices=nonpad_ids)

        # Reshape x from 2 dimensions to 3 dimensions.
        x.set_shape([None, self.hidden_size])
        x = tf.expand_dims(x, axis=0)

    output = self.filter_dense_layer(x)
    if self.train:
      output = tf.nn.dropout(output, 1.0 - self.relu_dropout)
    output = self.output_dense_layer(output)

    if padding is not None:
      with tf.name_scope("re_add_padding"):
        output = tf.squeeze(output, axis=0)
        output = tf.scatter_nd(
            indices=nonpad_ids,
            updates=output,
            shape=[batch_size * length, self.hidden_size]
        )
        output = tf.reshape(output, [batch_size, length, self.hidden_size])
    return output
 def testScatterNdRepatedIndicesAdd(self):
   indices = tf.zeros([100000, 1], tf.int32)
   values = np.random.randn(100000)
   shape = [1]
   with self.test_session():
     val = tf.scatter_nd(indices, values, shape).eval()
   self.assertAllClose([np.sum(values)], val)
def max_unpool(inputs, pooling_indices, output_shape=None, k_size=[1, 2, 2, 1]):
    # NOTE! this function is based on the implementation by kwotsin in
    # https://github.com/kwotsin/TensorFlow-ENet

    # inputs has shape [batch_size, height, width, channels]

    # pooling_indices: pooling indices of the previously max_pooled layer

    # output_shape: what shape the returned tensor should have

    pooling_indices = tf.cast(pooling_indices, tf.int32)
    input_shape = tf.shape(inputs, out_type=tf.int32)

    one_like_pooling_indices = tf.ones_like(pooling_indices, dtype=tf.int32)
    batch_shape = tf.concat([[input_shape[0]], [1], [1], [1]], 0)
    batch_range = tf.reshape(tf.range(input_shape[0], dtype=tf.int32), shape=batch_shape)
    b = one_like_pooling_indices*batch_range
    y = pooling_indices//(output_shape[2]*output_shape[3])
    x = (pooling_indices//output_shape[3]) % output_shape[2]
    feature_range = tf.range(output_shape[3], dtype=tf.int32)
    f = one_like_pooling_indices*feature_range

    inputs_size = tf.size(inputs)
    indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, inputs_size]))
    values = tf.reshape(inputs, [inputs_size])

    ret = tf.scatter_nd(indices, values, output_shape)

    return ret
  def testEmptyOutputShape3(self):
    indices = tf.zeros([0], tf.int32)
    updates = tf.zeros([0], tf.int32)
    shape = tf.constant([0], tf.int32)
    scatter = tf.scatter_nd(indices, updates, shape)

    with self.test_session():
      self.assertEqual(scatter.eval().size, 0)
 def _unsparsify(var_x):
     if not isinstance(var_x, tf.IndexedSlices):
         return var_x
     assert var_x.dense_shape is not None, \
         "memory_saving_gradients encountered sparse gradients of unknown shape"
     indices = var_x.indices
     while indices.shape.ndims < var_x.values.shape.ndims:
         indices = tf.expand_dims(indices, -1)
     return tf.scatter_nd(indices, var_x.values, var_x.dense_shape)
  def testRank3ValidShape(self):
    indices = tf.zeros([2, 2, 2], tf.int32)
    updates = tf.zeros([2, 2, 2], tf.int32)
    shape = np.array([2, 2, 2])
    self.assertAllEqual(
        tf.scatter_nd(indices, updates, shape).get_shape().as_list(), shape)

    ref = tf.Variable(tf.zeros(shape, tf.int32))
    self.assertAllEqual(
        tf.scatter_nd_update(ref, indices, updates).get_shape().as_list(),
        shape)
  def get_shuffle_ind(self, size):
    if self.shuffle_ind is None:
      # put the shuffle in tf memory to make the eval jobs
      # re-entrant.
      shuffle_ind_val = np.random.permutation(size)
      shuffle_ind = tf.get_variable(
          name='shuffle_ind', dtype=tf.int64, initializer=shuffle_ind_val)
      unshuffle_ind = tf.scatter_nd(
          tf.reshape(shuffle_ind, [-1, 1]), tf.range(size), [size])

    return shuffle_ind, unshuffle_ind
  def testGradientsRank2SliceUpdate(self):
    indices = tf.constant([[1], [0]], dtype=tf.int32)
    updates = tf.constant([[3, 4], [1, 2]], dtype=tf.float64)
    shape = tf.constant([2, 2], dtype=tf.int32)
    outputs = tf.scatter_nd(indices, updates, shape)

    grad_vals = tf.constant([[3, 4], [1, 2]], dtype=tf.float64)
    grads = tf.gradients([outputs], [updates], [grad_vals])[0]
    expected_grads = np.array([[1, 2], [3, 4]], dtype=np.float64)
    with self.test_session():
      self.assertAllEqual(expected_grads, grads.eval())
Exemple #19
0
def set_final(sequence, sequence_length, values, time_major=False):
  """Sets the final values in a batch of sequences, and clears those after."""
  sequence_batch_major = (
      sequence if not time_major else tf.transpose(sequence, [1, 0, 2]))
  final_index = _get_final_index(sequence_length, time_major=False)
  mask = tf.sequence_mask(
      tf.maximum(0, sequence_length - 1),
      maxlen=sequence_batch_major.shape[1],
      dtype=tf.float32)
  sequence_batch_major = (
      tf.expand_dims(mask, axis=-1) * sequence_batch_major +
      tf.scatter_nd(final_index, values, tf.shape(sequence_batch_major)))
  return (sequence_batch_major if not time_major else
          tf.transpose(sequence_batch_major, [1, 0, 2]))
Exemple #20
0
  def call(self, x, padding=None):
    """Return outputs of the feedforward network.

    Args:
      x: tensor with shape [batch_size, length, hidden_size]
      padding: (optional) If set, the padding values are temporarily removed
        from x (provided self.allow_pad is set). The padding values are placed
        back in the output tensor in the same locations.
        shape [batch_size, length]

    Returns:
      Output of the feedforward network.
      tensor with shape [batch_size, length, hidden_size]
    """
    padding = None if not self.allow_pad else padding

    # Retrieve dynamically known shapes
    batch_size = tf.shape(x)[0]
    length = tf.shape(x)[1]

    if padding is not None:
      with tf.name_scope("remove_padding"):
        # Flatten padding to [batch_size*length]
        pad_mask = tf.reshape(padding, [-1])

        nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9))

        # Reshape x to [batch_size*length, hidden_size] to remove padding
        x = tf.reshape(x, [-1, self.hidden_size])
        x = tf.gather_nd(x, indices=nonpad_ids)

        # Reshape x from 2 dimensions to 3 dimensions.
        x.set_shape([None, self.hidden_size])
        x = tf.expand_dims(x, axis=0)

    output = self.filter_dense_layer(x)
    if self.train:
      output = tf.nn.dropout(output, 1.0 - self.relu_dropout)
    output = self.output_dense_layer(output)

    if padding is not None:
      with tf.name_scope("re_add_padding"):
        output = tf.squeeze(output, axis=0)
        output = tf.scatter_nd(
            indices=nonpad_ids,
            updates=output,
            shape=[batch_size * length, self.hidden_size]
        )
        output = tf.reshape(output, [batch_size, length, self.hidden_size])
    return output
  def __call__(self, shape, dtype=None, partition_info=None):
    del partition_info  # unused
    assert len(shape) > 2, shape

    support = tuple(shape[:-2]) + (1, 1)
    indices = [[s // 2 for s in support]]
    updates = tf.constant([self.gain], dtype=dtype)
    kernel = tf.scatter_nd(indices, updates, support)

    assert shape[-2] == shape[-1], shape
    if shape[-1] != 1:
      kernel *= tf.eye(shape[-1], dtype=dtype)

    return kernel
    def compute(bse):
      batch = bse[0]
      start = bse[1]
      end   = bse[2]
      size  = end - start

      seg_ended    = input[batch,start:end,1]
      idx          = tf.where(tf.not_equal(seg_ended, 0))
      length_dist  = tf.scatter_nd(idx, end_distribution[:tf.shape(idx)[0]], (window,))
      length_dist += no_label_backup[window-size:2*window-size]
      length_dist  = length_dist / tf.reduce_sum(length_dist)
      length_dist  = tf.expand_dims(length_dist, -1)

      result = onehot[batch,start:start+window,:] * length_dist
      return result
    def compute(bse):
      batch = bse[0]
      start = bse[1]
      end   = bse[2]

      batch_cls   = classes[batch][start:end]
      cls_not_eq  = tf.not_equal(batch_cls[:-1], batch_cls[1:])
      cls_changed = tf.concat([cls_not_eq, [True]], axis=0)
      idx         = tf.where(cls_changed)
      count       = tf.squeeze(tf.concat([[idx[0] + 1], idx[1:] - idx[:-1]], axis=0), axis=1)
      freq        = tf.cast(count, dtype='float32')

      res = tf.scatter_nd(idx, tf.cast(count, dtype='float32') / tf.cast(end - start, dtype='float32'), (window,))

      return res
        def f2():
            # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that
            # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model
            # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest.

            # To do this, we reshape `neg_class_loss_all` to 1D...
            neg_class_loss_all_1D = tf.reshape(neg_class_loss_all, [-1]) # Tensor of shape (batch_size * n_boxes,)
            # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those...
            values, indices = tf.nn.top_k(neg_class_loss_all_1D, n_negative_keep, False) # We don't need sorting
            # ...and with these indices we'll create a mask...
            negatives_keep = tf.scatter_nd(tf.expand_dims(indices, axis=1), updates=tf.ones_like(indices, dtype=tf.int32), shape=tf.shape(neg_class_loss_all_1D)) # Tensor of shape (batch_size * n_boxes,)
            negatives_keep = tf.to_float(tf.reshape(negatives_keep, [batch_size, n_boxes])) # Tensor of shape (batch_size, n_boxes)
            # ...and use it to keep only those boxes and mask all other classification losses
            neg_class_loss = tf.reduce_sum(classification_loss * negatives_keep, axis=-1) # Tensor of shape (batch_size,)
            return neg_class_loss
  def testExtraIndicesDimensions(self):
    indices = tf.zeros([1, 1, 2], tf.int32)
    updates = tf.zeros([1, 1], tf.int32)
    shape = np.array([2, 2])
    scatter = tf.scatter_nd(indices, updates, shape)
    self.assertAllEqual(scatter.get_shape().as_list(), shape)
    expected_result = np.zeros([2, 2], dtype=np.int32)
    with self.test_session():
      self.assertAllEqual(expected_result, scatter.eval())

    ref = tf.Variable(tf.zeros(shape, tf.int32))
    scatter_update = tf.scatter_nd_update(ref, indices, updates)
    self.assertAllEqual(scatter_update.get_shape().as_list(), shape)

    with self.test_session():
      ref.initializer.run()
      self.assertAllEqual(expected_result, scatter_update.eval())
    def compute(bse):
      batch = bse[0]
      start = bse[1]
      end   = bse[2]
      size  = end - start

      cls_chg = cls_changed[batch][start:end]
      idx     = tf.where(cls_chg)
      res     = tf.scatter_nd(idx, end_distribution[:tf.shape(idx)[0]], (window,))
      if min_length > 1:
        res *= min_length_filter
      res    += no_label_backup[window-size:2*window-size]
      res     = res / tf.reduce_sum(res)
      if smoothing is not None:
        res = tf.tensordot(res, smoothing.read(size - 1), [[0], [0]])

      return res
  def restore(self, x):
    """Add padding back to the given tensor.

    Args:
      x (tf.Tensor): of shape [dim_compressed,...]

    Returns:
      a tensor of shape [dim_origin,...] with dim_compressed >= dim_origin. The
      dim is restored from the original reference tensor
    """
    with tf.name_scope("pad_reduce/restore"):
      x = tf.scatter_nd(
          indices=self.nonpad_ids,
          updates=x,
          shape=tf.concat([self.dim_origin, tf.shape(x)[1:]], axis=0),
      )
    return x
def combine_loss_val(embedding, labels, w_init, out_num, margin_a, margin_m, margin_b, s):
    '''
    This code is contributed by RogerLo. Thanks for you contribution.

    :param embedding: the input embedding vectors
    :param labels:  the input labels, the shape should be eg: (batch_size, 1)
    :param s: scalar value default is 64
    :param out_num: output class num
    :param m: the margin value, default is 0.5
    :return: the final cacualted output, this output is send into the tf.nn.softmax directly
    '''
    weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num),
                              initializer=w_init, dtype=tf.float32)
    weights_unit = tf.nn.l2_normalize(weights, axis=0)
    embedding_unit = tf.nn.l2_normalize(embedding, axis=1)
    cos_t = tf.matmul(embedding_unit, weights_unit)
    ordinal = tf.constant(list(range(0, embedding.get_shape().as_list()[0])), tf.int64)
    ordinal_y = tf.stack([ordinal, labels], axis=1)
    zy = cos_t * s
    sel_cos_t = tf.gather_nd(zy, ordinal_y)
    if margin_a != 1.0 or margin_m != 0.0 or margin_b != 0.0:
        if margin_a == 1.0 and margin_m == 0.0:
            s_m = s * margin_b
            new_zy = sel_cos_t - s_m
        else:
            cos_value = sel_cos_t / s
            t = tf.acos(cos_value)
            if margin_a != 1.0:
                t = t * margin_a
            if margin_m > 0.0:
                t = t + margin_m
            body = tf.cos(t)
            if margin_b > 0.0:
                body = body - margin_b
            new_zy = body * s
    updated_logits = tf.add(zy, tf.scatter_nd(ordinal_y, tf.subtract(new_zy, sel_cos_t), zy.get_shape()))
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=updated_logits))
    predict_cls = tf.argmax(updated_logits, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(predict_cls, tf.int64), tf.cast(labels, tf.int64)), 'float'))
    predict_cls_s = tf.argmax(zy, 1)
    accuracy_s = tf.reduce_mean(tf.cast(tf.equal(tf.cast(predict_cls_s, tf.int64), tf.cast(labels, tf.int64)), 'float'))
    return zy, loss, accuracy, accuracy_s, predict_cls_s
    def build_step(self, signals):
        input = signals.gather(self.input_data)
        input = tf.reshape(input, (self.n_ops, -1))

        state = signals.gather(self.state_sig)

        # compute output
        if self.C is None:
            output = tf.zeros_like(input)
        else:
            output = state * self.C
            output = tf.reshape(
                output,
                (self.n_ops, -1, signals.minibatch_size * self.signal_d))
            output = tf.reduce_sum(output, axis=1)

        if self.D is not None:
            output += self.D * input

        signals.scatter(self.output_data, output)

        # update state
        if LooseVersion(tf.__version__) < LooseVersion("1.7.0"):
            mat_mul = gen_sparse_ops._sparse_tensor_dense_mat_mul
        else:
            mat_mul = gen_sparse_ops.sparse_tensor_dense_mat_mul
        r = mat_mul(self.A_indices, self.A, self.A_shape, state)

        with tf.control_dependencies([output]):
            state = r + tf.scatter_nd(self.offsets, input,
                                      self.state_sig.shape)
            # TODO: tensorflow does not yet support sparse_tensor_dense_add
            # on the GPU
            # state = gen_sparse_ops._sparse_tensor_dense_add(
            #     self.offsets, input, self.state_sig.shape, r)
        state.set_shape(self.state_sig.shape)

        signals.mark_gather(self.input_data)
        signals.mark_gather(self.state_sig)
        signals.scatter(self.state_sig, state)
Exemple #30
0
    def _myForwardPass(self):
        cnn_feats = self._ph.cnn_feats
        pred_polys = self._ph.pred_polys
        pred_mask_imgs = self._ph.pred_mask_imgs
        last_cell_state_1 = self._ph.cells_1[:, -1, :, :, :]
        last_cell_state_2 = self._ph.cells_2[:, -1, :, :, :]
        weight_decay = 0.00001

        predicted_history = tf.zeros(shape=(self.batch_size, 28, 28, 1))

        # Drawing the canvas
        for i in range(self.seq_len):
            pred_polys_t = pred_polys[:, i]  # batch x
            indices = tf.concat(
                [tf.reshape(tf.range(0, self.batch_size), (self.batch_size, 1)), tf.cast(pred_polys_t, tf.int32)],
                axis=1)
            updates = tf.ones(shape=self.batch_size)
            pred_polys_t = tf.scatter_nd(indices, updates, shape=(self.batch_size, 28, 28))
            predicted_history = predicted_history + tf.expand_dims(pred_polys_t, axis=-1)

        xt = tf.concat([cnn_feats, predicted_history, pred_mask_imgs, last_cell_state_1, last_cell_state_2],
                       axis=3)

        with slim.arg_scope([slim.conv2d], kernel_size=[3, 3], stride=1,
                            weights_regularizer=slim.l2_regularizer(weight_decay),
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params={"is_training": self.is_training, "decay": 0.99, "center": True,
                                               "scale": True},
                            weights_initializer=layers.variance_scaling_initializer(
                                factor=2.0, mode='FAN_IN',
                                uniform=False)
                            ):
            self._conv1 = slim.conv2d(xt, scope="conv1", num_outputs=16)
            self._conv2 = slim.conv2d(self._conv1, scope="conv2", num_outputs=1)

        output = layers.fully_connected(slim.flatten(self._conv2), 1, weights_regularizer=layers.l2_regularizer(1e-5),
                                        scope="FC")
        return output
def train(train_data, test_data=None):
    G = train_data[0]
    features = train_data[1]
    id_map = train_data[2]

    if not features is None:
        # pad with dummy zero vector
        features = np.vstack([features, np.zeros((features.shape[1], ))])

    context_pairs = train_data[3] if FLAGS.random_context else None
    placeholders = construct_placeholders()
    minibatch = EdgeMinibatchIterator(G,
                                      id_map,
                                      placeholders,
                                      batch_size=FLAGS.batch_size,
                                      max_degree=FLAGS.max_degree,
                                      num_neg_samples=FLAGS.neg_sample_size,
                                      context_pairs=context_pairs)
    adj_info_ph = tf.compat.v1.placeholder(tf.int32, shape=minibatch.adj.shape)
    adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info")

    if FLAGS.model == 'graphsage_mean':
        # Create model
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   logging=True)
    elif FLAGS.model == 'gcn':
        # Create model
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, 2 * FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, 2 * FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   aggregator_type="gcn",
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   concat=False,
                                   logging=True)

    elif FLAGS.model == 'graphsage_seq':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   identity_dim=FLAGS.identity_dim,
                                   aggregator_type="seq",
                                   model_size=FLAGS.model_size,
                                   logging=True)

    elif FLAGS.model == 'graphsage_maxpool':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   aggregator_type="maxpool",
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   logging=True)
    elif FLAGS.model == 'graphsage_meanpool':
        sampler = UniformNeighborSampler(adj_info)
        layer_infos = [
            SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1),
            SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2)
        ]

        model = SampleAndAggregate(placeholders,
                                   features,
                                   adj_info,
                                   minibatch.deg,
                                   layer_infos=layer_infos,
                                   aggregator_type="meanpool",
                                   model_size=FLAGS.model_size,
                                   identity_dim=FLAGS.identity_dim,
                                   logging=True)

    elif FLAGS.model == 'n2v':
        model = Node2VecModel(
            placeholders,
            features.shape[0],
            minibatch.deg,
            #2x because graphsage uses concat
            nodevec_dim=2 * FLAGS.dim_1,
            lr=FLAGS.learning_rate)
    else:
        raise Exception('Error: model name unrecognized.')

    config = tf.compat.v1.ConfigProto(
        log_device_placement=FLAGS.log_device_placement)
    config.gpu_options.allow_growth = True
    #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION
    config.allow_soft_placement = True

    # Initialize WandB experiment
    wandb.init(project='chengdu_GraphSAGE',
               save_code=True,
               tags=['unsupervised'])
    wandb.config.update(flags.FLAGS)

    # Initialize session
    sess = tf.compat.v1.Session(config=config)
    merged = tf.compat.v1.summary.merge_all()
    summary_writer = tf.compat.v1.summary.FileWriter(log_dir(), sess.graph)

    # Init variables
    sess.run(tf.compat.v1.global_variables_initializer(),
             feed_dict={adj_info_ph: minibatch.adj})

    # Init saver
    saver = tf.compat.v1.train.Saver(max_to_keep=8,
                                     keep_checkpoint_every_n_hours=1)

    # Train model
    train_shadow_mrr = None
    val_shadow_mrr = None

    total_steps = 0
    avg_time = 0.0
    epoch_val_costs = []

    train_adj_info = tf.compat.v1.assign(adj_info, minibatch.adj)
    val_adj_info = tf.compat.v1.assign(adj_info, minibatch.test_adj)
    for epoch in range(FLAGS.epochs):
        minibatch.shuffle()

        iter = 0
        print('Epoch: %04d' % (epoch + 1))
        epoch_val_costs.append(0)
        while not minibatch.end():
            # Construct feed dictionary
            feed_dict = minibatch.next_minibatch_feed_dict()
            feed_dict.update({placeholders['dropout']: FLAGS.dropout})

            t = time.time()
            # Training step
            outs = sess.run([
                merged, model.opt_op, model.loss, model.ranks, model.aff_all,
                model.mrr, model.outputs1
            ],
                            feed_dict=feed_dict)
            train_cost = outs[2]
            train_mrr = outs[5]
            if train_shadow_mrr is None:
                train_shadow_mrr = train_mrr  #
            else:
                train_shadow_mrr -= (1 - 0.99) * (train_shadow_mrr - train_mrr)

            # Validation
            if iter % FLAGS.validate_iter == 0:
                sess.run(val_adj_info.op)
                val_cost, ranks, val_mrr, duratioosm_evaluationn = evaluate(
                    sess, model, minibatch, size=FLAGS.validate_batch_size)
                sess.run(train_adj_info.op)
                epoch_val_costs[-1] += val_cost
            if val_shadow_mrr is None:
                val_shadow_mrr = val_mrr
            else:
                val_shadow_mrr -= (1 - 0.99) * (val_shadow_mrr - val_mrr)

            if total_steps % FLAGS.print_every == 0:
                summary_writer.add_summary(outs[0], total_steps)

            # Print results
            avg_time = (avg_time * total_steps + time.time() -
                        t) / (total_steps + 1)

            if total_steps % FLAGS.print_every == 0:
                print(
                    "[%03d/%03d]" % (epoch + 1, FLAGS.epochs),
                    "Iter:",
                    '%04d' % iter,
                    "train_loss =",
                    "{:.5f}".format(train_cost),
                    "train_mrr =",
                    "{:.5f}".format(train_mrr),
                    "train_mrr_ema =",
                    "{:.5f}".format(
                        train_shadow_mrr),  # exponential moving average
                    "val_loss =",
                    "{:.5f}".format(val_cost),
                    "val_mrr =",
                    "{:.5f}".format(val_mrr),
                    "val_mrr_ema =",
                    "{:.5f}".format(
                        val_shadow_mrr),  # exponential moving average
                    "time =",
                    "{:.5f}".format(avg_time))

            # W&B Logging
            if FLAGS.wandb_log and iter % FLAGS.wandb_log_iter == 0:
                wandb.log({'train_loss': train_cost, 'epoch': epoch})
                wandb.log({'train_mrr': train_mrr, 'epoch': epoch})
                wandb.log({'train_mrr_ema': train_shadow_mrr, 'epoch': epoch})
                wandb.log({'val_loss': val_cost, 'epoch': epoch})
                wandb.log({'val_mrr': val_mrr, 'epoch': epoch})
                wandb.log({'val_mrr_ema': val_shadow_mrr, 'epoch': epoch})
                wandb.log({'time': avg_time, 'epoch': epoch})

            iter += 1
            total_steps += 1

            if total_steps > FLAGS.max_total_steps:
                print('Max total steps reached!')
                break

        # Save embeddings
        if FLAGS.save_embeddings and epoch % FLAGS.save_embeddings_epoch == 0:
            save_val_embeddings(sess, model, minibatch,
                                FLAGS.validate_batch_size, log_dir())

            # # Also report classifier metric on the embedding
            # all_tr_res, all_ts_res = osm_evaluation.evaluate(FLAGS.train_prefix, log_dir, n_iter=FLAGS.classif_n_iter)
            # if FLAGS.wandb_log:
            #     wandb.log(all_tr_res)
            #     wandb.log(all_ts_res)

        # Save Model checkpoints
        if FLAGS.save_checkpoints and epoch % FLAGS.save_checkpoints_epoch == 0:
            # saver.save(sess, log_dir() + 'model', global_step=1000)
            print('Save model checkpoint:', wandb.run.dir, iter, total_steps,
                  epoch)
            saver.save(
                sess,
                os.path.join(wandb.run.dir,
                             "model-" + str(epoch + 1) + ".ckpt"))

        if total_steps > FLAGS.max_total_steps:
            print('Max total steps reached!')
            break

    print("Optimization Finished!")
    if FLAGS.save_embeddings:
        sess.run(val_adj_info.op)

        save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size,
                            log_dir())

        if FLAGS.model == "n2v":
            # stopping the gradient for the already trained nodes
            train_ids = tf.constant(
                [[id_map[n]] for n in G.nodes_iter()
                 if not G.node[n]['val'] and not G.node[n]['test']],
                dtype=tf.int32)
            test_ids = tf.constant([[id_map[n]] for n in G.nodes_iter()
                                    if G.node[n]['val'] or G.node[n]['test']],
                                   dtype=tf.int32)
            update_nodes = tf.nn.embedding_lookup(model.context_embeds,
                                                  tf.squeeze(test_ids))
            no_update_nodes = tf.nn.embedding_lookup(model.context_embeds,
                                                     tf.squeeze(train_ids))
            update_nodes = tf.scatter_nd(test_ids, update_nodes,
                                         tf.shape(model.context_embeds))
            no_update_nodes = tf.stop_gradient(
                tf.scatter_nd(train_ids, no_update_nodes,
                              tf.shape(model.context_embeds)))
            model.context_embeds = update_nodes + no_update_nodes
            sess.run(model.context_embeds)

            # run random walks
            from graphsage.utils import run_random_walks
            nodes = [
                n for n in G.nodes_iter()
                if G.node[n]["val"] or G.node[n]["test"]
            ]
            start_time = time.time()
            pairs = run_random_walks(G, nodes, num_walks=50)
            walk_time = time.time() - start_time

            test_minibatch = EdgeMinibatchIterator(
                G,
                id_map,
                placeholders,
                batch_size=FLAGS.batch_size,
                max_degree=FLAGS.max_degree,
                num_neg_samples=FLAGS.neg_sample_size,
                context_pairs=pairs,
                n2v_retrain=True,
                fixed_n2v=True)

            start_time = time.time()
            print("Doing test training for n2v.")
            test_steps = 0
            for epoch in range(FLAGS.n2v_test_epochs):
                test_minibatch.shuffle()
                while not test_minibatch.end():
                    feed_dict = test_minibatch.next_minibatch_feed_dict()
                    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
                    outs = sess.run([
                        model.opt_op, model.loss, model.ranks, model.aff_all,
                        model.mrr, model.outputs1
                    ],
                                    feed_dict=feed_dict)
                    if test_steps % FLAGS.print_every == 0:
                        print("Iter:", '%04d' % test_steps, "train_loss=",
                              "{:.5f}".format(outs[1]), "train_mrr=",
                              "{:.5f}".format(outs[-2]))
                    test_steps += 1
            train_time = time.time() - start_time
            save_val_embeddings(sess,
                                model,
                                minibatch,
                                FLAGS.validate_batch_size,
                                log_dir(),
                                mod="-test")
            print("Total time: ", train_time + walk_time)
            print("Walk time: ", walk_time)
            print("Train time: ", train_time)
 def _scatter(indices, array, new_dimensions):
     indices = np.expand_dims(indices, 1)
     return tf.scatter_nd(indices, array, new_dimensions)
Exemple #33
0
def word_distribution(decoder_logit_list,
                      decoder_output_list,
                      encoder_outputs,
                      encoder_embedding,
                      sentence_complex_input_placeholder,
                      obj_tensors,
                      model_config,
                      data,
                      segment_mask=None,
                      is_test=False):
    if model_config.architecture == 'ut2t':
        # attn_dists = obj_tensors[
        #     'model/transformer_decoder/decoder/universal_transformer_act/encdec_attention/multihead_attention/dot_product_attention']
        # attn_dists = attn_dists[:, 0, :, :]
        raise ValueError('Cannot use copy in u2t2')
    else:
        attn_dists = obj_tensors[
            'model/transformer_decoder/decoder/layer_%s/encdec_attention/multihead_attention/dot_product_attention'
            % (model_config.num_decoder_layers - 1)]
        attn_dists_stack = attn_dists[:, 0, :, :]
        if is_test:
            attn_dists = [attn_dists_stack[:, -1, :]]
            attn_dists_stack = tf.expand_dims(attn_dists[0], axis=1)
        else:
            attn_dists = tf.unstack(attn_dists_stack, axis=1)

    sentence_complex_input = tf.stack(sentence_complex_input_placeholder,
                                      axis=1)
    ignore_token_idx = data.vocab_simple.encode(constant.SYMBOL_UNK)
    if type(ignore_token_idx) == list:
        assert len(ignore_token_idx) == 1
        ignore_token_idx = ignore_token_idx[0]
    if segment_mask is not None:
        sentence_complex_input *= segment_mask
        sentence_complex_input += tf.to_int32(
            tf.to_float(tf.equal(sentence_complex_input, 0)) *
            ignore_token_idx)

    batch_nums = tf.range(0, limit=model_config.batch_size)
    batch_nums = tf.expand_dims(batch_nums, 1)
    batch_nums = tf.tile(batch_nums, [1, model_config.max_complex_sentence])
    indices = tf.stack((batch_nums, sentence_complex_input), axis=2)
    attn_dists_projected = [
        tf.scatter_nd(
            indices, copy_dist,
            [model_config.batch_size,
             data.vocab_simple.vocab_size()]) for copy_dist in attn_dists
    ]
    for attn_id, attn_dist in enumerate(attn_dists_projected):
        mask = tf.concat([
            tf.ones([model_config.batch_size, ignore_token_idx]),
            tf.zeros([model_config.batch_size, 1]),
            tf.ones([
                model_config.batch_size,
                data.vocab_simple.vocab_size() - ignore_token_idx - 1
            ])
        ],
                         axis=1)
        attn_dists_projected[attn_id] *= mask

    attn_dists_projected = tf.stack(attn_dists_projected, axis=1)
    attn_dists_projected = tf.stop_gradient(attn_dists_projected)

    decoder_logit = tf.stack(decoder_logit_list, axis=1)
    decoder_output = tf.stack(decoder_output_list, axis=1)

    context_vectors = tf.matmul(attn_dists_stack, encoder_outputs)
    context_emb_vectors = tf.matmul(attn_dists_stack, encoder_embedding)
    context_vectors = tf.stop_gradient(context_vectors)
    context_emb_vectors = tf.stop_gradient(context_emb_vectors)
    decoder_output = tf.stop_gradient(decoder_output)
    # decoder_logit = tf.stop_gradient(decoder_logit)
    evidence = tf.concat(
        [context_vectors, context_emb_vectors, decoder_output], axis=-1)
    gate = tf.layers.dense(evidence, 1, activation=tf.nn.sigmoid)
    if 'thres' in model_config.pointer_mode:
        output_logit = tf.cond(tf.greater_equal(gate, 0.5),
                               lambda: attn_dists_projected,
                               lambda: decoder_logit)
    elif 'fuse' in model_config.pointer_mode:
        output_logit = gate * attn_dists_projected + (1 - gate) * decoder_logit
    else:
        raise NotImplementedError('unknown output pointer')

    return tf.unstack(output_logit, axis=1)
def sphereloss(inputs,
               label,
               classes,
               batch_size,
               fraction=1,
               scope='Logits',
               reuse=None,
               m=4,
               eplion=1e-8):
    """
    inputs tensor shape=[batch,features_num]
    labels tensor shape=[batch] each unit belong num_outputs

    """
    inputs_shape = inputs.get_shape().as_list()
    with tf.variable_scope(name_or_scope=scope):
        weight = tf.Variable(initial_value=tf.random_normal(
            (classes, inputs_shape[1])) * tf.sqrt(2 / inputs_shape[1]),
                             dtype=tf.float32,
                             name='weights')  # shaep =classes, features,
        print("weight shape = ", weight.get_shape().as_list())

    weight_unit = tf.nn.l2_normalize(weight, dim=1)
    print("weight_unit shape = ", weight_unit.get_shape().as_list())

    inputs_mo = tf.sqrt(tf.reduce_sum(tf.square(inputs), axis=1) +
                        eplion)  #shape=[batch
    print("inputs_mo shape = ", inputs_mo.get_shape().as_list())

    inputs_unit = tf.nn.l2_normalize(inputs,
                                     dim=1)  #shape = [batch,features_num]
    print("inputs_unit shape = ", inputs_unit.get_shape().as_list())

    logits = tf.matmul(
        inputs, tf.transpose(weight_unit))  #shape = [batch,classes] x * w_unit
    print("logits shape = ", logits.get_shape().as_list())

    weight_unit_batch = tf.gather(weight_unit,
                                  label)  # shaep =batch,features_num,
    print("weight_unit_batch shape = ",
          weight_unit_batch.get_shape().as_list())

    logits_inputs = tf.reduce_sum(tf.multiply(inputs, weight_unit_batch),
                                  axis=1)  # shaep =batch,

    print("logits_inputs shape = ", logits_inputs.get_shape().as_list())

    cos_theta = tf.reduce_sum(tf.multiply(inputs_unit, weight_unit_batch),
                              axis=1)  # shaep =batch,
    print("cos_theta shape = ", cos_theta.get_shape().as_list())

    cos_theta_square = tf.square(cos_theta)
    cos_theta_biq = tf.pow(cos_theta, 4)
    sign0 = tf.sign(cos_theta)
    sign2 = tf.sign(2 * cos_theta_square - 1)
    sign3 = tf.multiply(sign2, sign0)
    sign4 = 2 * sign0 + sign3 - 3
    cos_far_theta = sign3 * (8 * cos_theta_biq - 8 * cos_theta_square +
                             1) + sign4
    print("cos_far_theta  = ", cos_far_theta.get_shape().as_list())

    logit_ii = tf.multiply(cos_far_theta, inputs_mo)  #shape = batch
    print("logit_ii shape = ", logit_ii.get_shape().as_list())

    index_range = tf.range(start=0,
                           limit=tf.shape(inputs, out_type=tf.int64)[0],
                           delta=1,
                           dtype=tf.int64)
    index_labels = tf.stack([index_range, label], axis=1)
    index_logits = tf.scatter_nd(index_labels,
                                 tf.subtract(logit_ii, logits_inputs),
                                 tf.shape(logits, out_type=tf.int64))
    print("index_logits shape = ", logit_ii.get_shape().as_list())

    logits_final = tf.add(logits, index_logits)
    logits_final = fraction * logits_final + (1 - fraction) * logits

    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label,
                                                       logits=logits_final))

    return logits_final, loss
Exemple #35
0
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                         tf.nn.l2_loss(b_answer2))

    # The regressed word. This isn't an actual word yet;
    #    we still have to find the closest match.
    logit = tf.expand_dims(
        tf.matmul(tf.matmul(a0, w_answer1) + b_answer1, w_answer2) + b_answer2,
        1)

    # Make a mask over which words exist.
    with tf.variable_scope("ending"):
        all_ends = tf.reshape(input_sentence_endings, [-1, 2])
        range_ends = tf.range(tf.shape(all_ends)[0])
        ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1)
        ind = tf.reduce_max(tf.scatter_nd(
            ends_indices, all_ends[:, 1],
            [tf.shape(q)[0], tf.shape(all_ends)[0]]),
                            axis=-1)
        range_ind = tf.range(tf.shape(ind)[0])
        mask_ends = tf.cast(
            tf.scatter_nd(tf.stack([ind, range_ind], axis=1),
                          tf.ones_like(range_ind),
                          [tf.reduce_max(ind) + 1,
                           tf.shape(ind)[0]]), bool)
        # A bit of a trick. With the locations of the ends of the mask (the last periods in
        #  each of the contexts) as 1 and the rest as 0, we can scan with exclusive or
        #  (starting from all 1). For each context in the batch, this will result in 1s
        #  up until the marker (the location of that last period) and 0s afterwards.
        mask = tf.scan(tf.logical_xor, mask_ends,
                       tf.ones_like(range_ind, dtype=bool))
    def step(self, time, inputs, state, name=None):
        """Perform a decoding step.
        Args:
        time: scalar `int32` tensor.
        inputs: A (structure of) input tensors.
        state: A (structure of) state tensors and TensorArrays.
        name: Name scope for any created operations.
        Returns:
        `(outputs, next_state, next_inputs, finished)`.
        """
        with ops.name_scope(name, "PGDecoderStep", (time, inputs, state)):
            cell_outputs, cell_state = self._cell(inputs, state)
            # the first cell state contains attention, which is context
            attention = cell_state[0].attention
            att_cell_state = cell_state[0].cell_state
            alignments = cell_state[0].alignments

            with tf.variable_scope('calculate_pgen'):
                p_gen = _linear([attention, inputs, att_cell_state], 1, True)
                p_gen = tf.sigmoid(p_gen)

            if self._output_layer is not None:
                cell_outputs = self._output_layer(cell_outputs)

            vocab_dist = tf.nn.softmax(cell_outputs) * p_gen

            # z = tf.reduce_sum(alignments,axis=1)
            # z = tf.reduce_sum(tf.cast(tf.less_equal(alignments, 0),tf.int32))
            alignments = alignments * (1 - p_gen)

            # x = tf.reduce_sum(tf.cast(tf.less_equal((1-p_gen), 0),tf.int32))
            # y = tf.reduce_sum(tf.cast(tf.less_equal(alignments[3], 0),tf.int32))

            # this is only for debug
            # alignments2 =  tf.Print(alignments2,[tf.shape(inputs),x,y,alignments[2][9:12]],message="zeros in vocab dist and alignments")

            # since we have OOV words, we need expand the vocab dist
            vocab_size = tf.shape(vocab_dist)[-1]
            extended_vsize = vocab_size + self.source_oov_words
            batch_size = tf.shape(vocab_dist)[0]
            extra_zeros = tf.zeros((batch_size, self.source_oov_words))
            # batch * extend vocab size
            vocab_dists_extended = tf.concat(axis=-1,
                                             values=[vocab_dist, extra_zeros])
            # vocab_dists_extended = tf.Print(vocab_dists_extended,[tf.shape(vocab_dists_extended),self.source_oov_words],message='vocab_dists_extended size')

            batch_nums = tf.range(0, limit=batch_size)  # shape (batch_size)
            batch_nums = tf.expand_dims(batch_nums, 1)  # shape (batch_size, 1)
            attn_len = tf.shape(self.source_extend_tokens)[
                1]  # number of states we attend over
            batch_nums = tf.tile(batch_nums,
                                 [1, attn_len])  # shape (batch_size, attn_len)
            indices = tf.stack((batch_nums, self.source_extend_tokens),
                               axis=2)  # shape (batch_size, enc_t, 2)
            shape = [batch_size, extended_vsize]
            attn_dists_projected = tf.scatter_nd(indices, alignments, shape)

            final_dists = attn_dists_projected + vocab_dists_extended
            # final_dists = tf.Print(final_dists,[tf.reduce_sum(tf.cast(tf.less_equal(final_dists[0],0),tf.int32))],message='final dist')
            # note: sample_ids will contains OOV words
            sample_ids = self._helper.sample(time=time,
                                             outputs=final_dists,
                                             state=cell_state)

            (finished, next_inputs,
             next_state) = self._helper.next_inputs(time=time,
                                                    outputs=cell_outputs,
                                                    state=cell_state,
                                                    sample_ids=sample_ids)

            outputs = tf.contrib.seq2seq.BasicDecoderOutput(
                final_dists, sample_ids)
            return (outputs, next_state, next_inputs, finished)
Exemple #37
0
def scatter_nd(*args, **kwargs):
    """ See https://www.tensorflow.org/api_docs/python/tf/scatter_nd .
    """
    return tensorflow.scatter_nd(*args, **kwargs)
    def __call__(self, y_t_1, s_t_1, encoder_outputs, encoder_feature,
                 enc_padding_mask, extra_zeros, enc_batch_extend_vocab,
                 coverage, step):

        if step == 0:
            c_t, _, coverage_next = self.attention(s_t_1, encoder_outputs,
                                                   encoder_feature,
                                                   enc_padding_mask, coverage)

            coverage = coverage_next

        y_t_1_embd = self.embedding(y_t_1)  # batch_size x 1 x embedding_dim
        y_t_1_embd = self.dropout(y_t_1_embd)
        y_t_1_embd = tf.expand_dims(y_t_1_embd, 1)
        gru_out, s_t = self.gru(
            y_t_1_embd, s_t_1
        )  # batch_size x 1 x embedding_dim   batch_size x embedding_dim
        gru_out = self.dropout(gru_out)

        s_t_hat = s_t  # batch_size x embedding_dim
        c_t, attn_dist, coverage_next = self.attention(s_t_hat,
                                                       encoder_outputs,
                                                       encoder_feature,
                                                       enc_padding_mask,
                                                       coverage)

        if step > 0:
            coverage = coverage_next

        p_gen = None
        if self.pointer_gen:
            y_t_1_embd = tf.reshape(y_t_1_embd, [-1, self.embedding_dim])
            p_gen_input = tf.concat((c_t, s_t_hat, y_t_1_embd),
                                    1)  # B x (2*2*dec_units + emb_dim)
            p_gen = self.p_gen_linear(p_gen_input)
            p_gen = tf.sigmoid(p_gen)

        output = tf.concat((tf.reshape(gru_out, [-1, self.dec_units]), c_t),
                           1)  # B x dec_units * 3
        output = self.out1(output)  # B x dec_units

        # output = F.relu(output)

        output = self.out2(output)  # B x vocab_size
        vocab_dist = tf.nn.softmax(output, axis=1)

        if self.pointer_gen:
            vocab_dist_ = p_gen * vocab_dist
            attn_dist_ = (1 - p_gen) * attn_dist
            if extra_zeros is not None:
                vocab_dist_ = tf.concat([vocab_dist_, extra_zeros], 1)
            shape_ = vocab_dist_.shape[1]
            enc_batch_extend_vocab = tf.expand_dims(enc_batch_extend_vocab, 2)
            attn_vocab_dist_ = tf.convert_to_tensor([
                tf.scatter_nd(indices, updates, [shape_])
                for (indices,
                     updates) in zip(enc_batch_extend_vocab, attn_dist_)
            ])
            final_dist = vocab_dist_ + attn_vocab_dist_
        else:
            final_dist = vocab_dist

        return final_dist, s_t, c_t, attn_dist, p_gen, coverage
def dense_from_coo(shape, conns, dtype=tf.float64):
    idxs, weights = conns
    if len(idxs) == 0:
        return tf.zeros(shape, dtype=dtype)
    rows, cols = np.array(idxs).transpose()
    return tf.scatter_nd(tf.stack([rows, cols], -1), tf.convert_to_tensor(weights, preferred_dtype=dtype), shape)
def multiple_dot_attention(query,
                           key,
                           value,
                           query_length=None,
                           query_mask=None,
                           memory_length=None,
                           memory_mask=None,
                           name=None):
    """
    Attention method for given queries, keys and values, which in each sample we have multiple queries.
    (a sequence of queries)

    Args:
        query: a Tensor of shape [batch_size, q_length, query_dim]
        key: a Tensor of shape [batch_size, seq_length, query_dim]
        value: a Tensor of shape [batch_size, seq_length, value_dim]
        query_length: (optional) an integer Tensor of shape [batch_size] which specify length of
                        queries for each sample
        query_mask: (optional) a bool Tensor of shape [batch_size, query_length] for specifying  the true
                    elements of queries in the condition that query_length is not given
        memory_length: (optional) an integer Tensor of shape [batch_size] which specify length of
                        memory (key and values) for each sample
        memory_mask: (optional) a bool Tensor of shape [batch_size, seq_length] for specifying the true elements of
                     keys and values in the condition that memory_length is not given
        name: (optional)

    Returns:
        a Tensor of shape [batch_size, q_length, value_dim] which is the result of attention mechanism

    """
    if name is None:
        name = "multiple_dot_attention"
    with tf.name_scope(name):
        if query_length is not None and query_mask is not None:
            raise AttributeError(
                "Only one of query_length and query_mask can be specified")
        if memory_length is not None and memory_mask is not None:
            raise AttributeError(
                "Only one of memory_length and memory_mask can be specified")
        query_shape = tf.shape(query)
        key_shape = tf.shape(key)
        value_shape = tf.shape(value)
        batch_size = query_shape[0]
        q_length = query_shape[1]
        seq_length = key_shape[1]
        query_dim = query_shape[2]
        value_dim = value_shape[2]
        if query_length is not None:
            query_mask = mask_length(query_length, q_length)
        if query_mask is None:
            query_mask = tf.fill([batch_size, q_length], True)
        if memory_length is not None:
            memory_mask = mask_length(memory_length, seq_length)
        if memory_mask is None:
            memory_mask = tf.fill([batch_size, seq_length], True)
        indices = tf.where(query_mask)
        query = tf.boolean_mask(query, query_mask)
        key = tf.gather(key, indices[:, 0])
        value = tf.gather(value, indices[:, 0])
        memory_mask = tf.gather(memory_mask, indices[:, 0])
        attention = simple_dot_attention(query,
                                         key,
                                         value,
                                         memory_mask=memory_mask)
    return tf.scatter_nd(indices,
                         attention, [batch_size, q_length, value_dim],
                         name=name)
Exemple #41
0
def compute_vertex_normal(vertices: tf.Tensor, indices: tf.Tensor):
    """
        Compute vertex normal by weighted average of nearby face normals using Nelson Max's algorithm.
        See `Weights for Computing Vertex Normals from Facet Vectors <https://escholarship.org/content/qt7657d8h3/qt7657d8h3.pdf?t=ptt283>`_.

        Args
        ====
        vertices: tf.Tensor
            3D position of vertices
            float32 tensor with size num_vertices x 3
        indices: tf.Tensor
            vertex indices of triangle faces.
            int32 tensor with size num_triangles x 3

        Returns
        =======
        tf.Tensor
            per-vertex normal, float32 Tensor with size num_vertices x 3
    """
    def dot(v1, v2):
        return tf.math.reduce_sum(v1 * v2, axis=1)

    def squared_length(v):
        return tf.math.reduce_sum(v * v, axis=1)

    def length(v):
        return tf.sqrt(squared_length(v))

    def safe_asin(v):
        # Hack: asin(1)' is infinite, so we want to clamp the contribution
        return tf.asin(tf.clip_by_value(v, 0, 1 - 1e-6))

    # Nelson Max, "Weights for Computing Vertex Normals from Facet Vectors", 1999
    normals = tf.zeros(vertices.shape, dtype=tf.float32)

    # NOTE: Try tf.TensorArray()
    v = [
        tf.gather(vertices, indices[:, 0]),
        tf.gather(vertices, indices[:, 1]),
        tf.gather(vertices, indices[:, 2])
    ]

    for i in range(3):
        v0 = v[i]
        v1 = v[(i + 1) % 3]
        v2 = v[(i + 2) % 3]
        e1 = v1 - v0
        e2 = v2 - v0
        e1_len = length(e1)
        e2_len = length(e2)
        side_a = e1 / tf.reshape(e1_len, [-1, 1])
        side_b = e2 / tf.reshape(e2_len, [-1, 1])
        if i == 0:
            n = tf.linalg.cross(side_a, side_b)
            n = tf.where(\
                tf.broadcast_to(tf.reshape(length(n) > 0, (-1, 1)), tf.shape(n)),
                n / tf.reshape(length(n), (-1, 1)),
                tf.zeros(tf.shape(n), dtype=n.dtype))

        angle = tf.where(
            dot(side_a, side_b) < 0,
            math.pi - 2.0 * safe_asin(0.5 * length(side_a + side_b)),
            2.0 * safe_asin(0.5 * length(side_b - side_a)))
        sin_angle = tf.sin(angle)

        e1e2 = e1_len * e2_len
        # contrib is 0 when e1e2 is 0
        contrib = tf.reshape(\
            tf.where(e1e2 > 0, sin_angle / e1e2, tf.zeros(tf.shape(e1e2), dtype=e1e2.dtype)), (-1, 1))
        contrib = n * tf.broadcast_to(
            contrib, [tf.shape(contrib)[0], 3])  # In torch, `expand(-1, 3)`
        normals += tf.scatter_nd(tf.reshape(indices[:, i], [-1, 1]),
                                 contrib,
                                 shape=tf.shape(normals))

    degenerate_normals = tf.constant((0.0, 0.0, 1.0))
    degenerate_normals = tf.broadcast_to(
        tf.reshape(degenerate_normals, (1, 3)), tf.shape(normals))
    normals = tf.where(
        tf.broadcast_to(tf.reshape(length(normals) > 0, (-1, 1)),
                        tf.shape(normals)),
        normals / tf.reshape(length(normals), (-1, 1)), degenerate_normals)
    return normals
Exemple #42
0
def knot_weights(positions,
                 num_knots,
                 degree,
                 cyclical,
                 sparse_mode=False,
                 name=None):
  """Function that converts cardinal B-spline positions to knot weights.

  Note:
    In the following, A1 to An are optional batch dimensions.

  Args:
    positions: A tensor with shape `[A1, .. An]`. Positions must be between
      `[0, C - D)` for non-cyclical and `[0, C)` for cyclical splines, where `C`
      is the number of knots and `D` is the spline degree.
    num_knots: A strictly positive `int` describing the number of knots in the
      spline.
    degree: An `int` describing the degree of the spline, which must be smaller
      than `num_knots`.
    cyclical: A `bool` describing whether the spline is cyclical.
    sparse_mode: A `bool` describing whether to return a result only for the
      knots with nonzero weights. If set to True, the function returns the
      weights of only the `degree` + 1 knots that are non-zero, as well as the
      indices of the knots.
    name: A name for this op. Defaults to "bspline_knot_weights".

  Returns:
    A tensor with dense weights for each control point, with the shape
    `[A1, ... An, C]` if `sparse_mode` is False.
    Otherwise, returns a tensor of shape `[A1, ... An, D + 1]` that contains the
    non-zero weights, and a tensor with the indices of the knots, with the type
    tf.int32.

  Raises:
    ValueError: If degree is greater than 4 or num_knots - 1, or less than 0.
    InvalidArgumentError: If positions are not in the right range.
  """
  with tf.compat.v1.name_scope(name, "bspline_knot_weights", [positions]):
    positions = tf.convert_to_tensor(value=positions)

    if degree > 4 or degree < 0:
      raise ValueError("Degree should be between 0 and 4.")
    if degree > num_knots - 1:
      raise ValueError("Degree cannot be >= number of knots.")
    if cyclical:
      positions = asserts.assert_all_in_range(positions, 0.0, float(num_knots))
    else:
      positions = asserts.assert_all_in_range(positions, 0.0,
                                              float(num_knots - degree))

    all_basis_functions = {
        # Maps valid degrees to functions.
        Degree.CONSTANT: _constant,
        Degree.LINEAR: _linear,
        Degree.QUADRATIC: _quadratic,
        Degree.CUBIC: _cubic,
        Degree.QUARTIC: _quartic
    }
    basis_functions = all_basis_functions[degree]

    if not cyclical and num_knots - degree == 1:
      # In this case all weights are non-zero and we can just return them.
      if not sparse_mode:
        return basis_functions(positions)
      else:
        shift = tf.zeros_like(positions, dtype=tf.int32)
        return basis_functions(positions), shift

    # shape_batch = positions.shape.as_list()
    shape_batch = tf.shape(input=positions)
    positions = tf.reshape(positions, shape=(-1,))

    # Calculate the nonzero weights from the decimal parts of positions.
    shift = tf.floor(positions)
    sparse_weights = basis_functions(positions - shift)
    shift = tf.cast(shift, tf.int32)

    if sparse_mode:
      # Returns just the weights and the shift amounts, so that tf.gather_nd on
      # the knots can be used to sparsely activate knots if needed.
      shape_weights = tf.concat(
          (shape_batch, tf.constant((degree + 1,), dtype=tf.int32)), axis=0)
      sparse_weights = tf.reshape(sparse_weights, shape=shape_weights)
      shift = tf.reshape(shift, shape=shape_batch)
      return sparse_weights, shift

    num_positions = tf.size(input=positions)
    ind_row, ind_col = tf.meshgrid(
        tf.range(num_positions, dtype=tf.int32),
        tf.range(degree + 1, dtype=tf.int32),
        indexing="ij")

    tiled_shifts = tf.reshape(
        tf.tile(tf.expand_dims(shift, axis=-1), multiples=(1, degree + 1)),
        shape=(-1,))
    ind_col = tf.reshape(ind_col, shape=(-1,)) + tiled_shifts
    if cyclical:
      ind_col = tf.math.mod(ind_col, num_knots)
    indices = tf.stack((tf.reshape(ind_row, shape=(-1,)), ind_col), axis=-1)
    shape_indices = tf.concat((tf.reshape(
        num_positions, shape=(1,)), tf.constant(
            (degree + 1, 2), dtype=tf.int32)),
                              axis=0)
    indices = tf.reshape(indices, shape=shape_indices)
    shape_scatter = tf.concat((tf.reshape(
        num_positions, shape=(1,)), tf.constant((num_knots,), dtype=tf.int32)),
                              axis=0)
    weights = tf.scatter_nd(indices, sparse_weights, shape_scatter)
    shape_weights = tf.concat(
        (shape_batch, tf.constant((num_knots,), dtype=tf.int32)), axis=0)
    return tf.reshape(weights, shape=shape_weights)
    def _body(x_adv, epoch, pixel_mask):
        ybar = model(x_adv)

        y_target = tf.slice(ybar, [0, yi], [-1, 1])
        dy_dx, = tf.gradients(ybar, x_adv)

        dt_dx, = tf.gradients(y_target, x_adv)
        do_dx = dy_dx - dt_dx

        ind = tf.where(pixel_mask)
        n = tf.shape(ind)
        n = n[0]

        ind2 = tf.range(n)
        batch_size = tf.constant(100)

        def _maxpair_batch_cond(i0, j0, v0, start):
            return tf.less(start, n)

        def _maxpair_batch_body(i0, j0, v0, start):
            count = tf.reduce_min([batch_size, n - start])
            ind3 = tf.slice(ind2, [start], [count])

            # Selection C(n, 2), e.g., if n=4, a=[0 0 1 0 1 2], b=[1 2 2 3 3 3],
            # the corresponding element in each array makes a pair, i.e., the
            # pair index are store separately.  A special case is when there is
            # only one pixel left.
            a, b = tf.meshgrid(ind3, ind3)
            c = tf.cond(tf.greater(count, 1), lambda: tf.less(a, b),
                        lambda: tf.less_equal(a, b))
            c = tf.where(c)
            a, b = tf.gather_nd(a, c), tf.gather_nd(b, c)

            # ii, jj contains indices to pixels
            ii, jj = tf.gather(ind, a), tf.gather(ind, b)

            ti, oi = tf.gather_nd(dt_dx, ii), tf.gather_nd(do_dx, ii)
            tj, oj = tf.gather_nd(dt_dx, jj), tf.gather_nd(do_dx, jj)

            # the gradient of each pair is the sum of individuals
            t, o = ti + tj, oi + oj

            # increase target probability while decrease others
            c = tf.logical_and(t >= 0, o <= 0)
            not_empty = tf.reduce_any(c)

            # ensure that c is not empty
            c = tf.cond(not_empty, lambda: c,
                        lambda: tf.ones_like(c, dtype=bool))
            c = tf.where(c)

            t, o = tf.gather_nd(t, c), tf.gather_nd(o, c)
            ii, jj = tf.gather_nd(ii, c), tf.gather_nd(jj, c)

            # saliency score
            score = tf.cond(not_empty, lambda: tf.multiply(t, tf.abs(o)),
                            lambda: t - o)

            # find the max pair in current batch
            p = tf.argmax(score, axis=0)
            v = tf.reduce_max(score, axis=0)
            i, j = tf.gather(ii, p), tf.gather(jj, p)
            i, j = tf.to_int32(i), tf.to_int32(j)

            i1, j1, v1 = tf.cond(tf.greater(v, v0), lambda: (i, j, v), lambda:
                                 (i0, j0, v0))
            return i1, j1, v1, start + batch_size

        i = tf.to_int32(tf.gather(ind, 0))
        j = tf.to_int32(tf.gather(ind, 1))

        # Find max saliency pair in batch.  Naive iteration through the pair
        # takes O(n^2).  Vectorized implementation may speedup the running time
        # significantly, at the expense of O(n^2) space.  So Instead we find the
        # max pair with batch max, during each batch we use vectorized
        # implementation.
        i, j, _, _ = tf.while_loop(_maxpair_batch_cond,
                                   _maxpair_batch_body, (i, j, -1., 0),
                                   back_prop=False)

        dx = tf.scatter_nd([i], [eps], tf.shape(x_adv)) +\
             tf.scatter_nd([j], [eps], tf.shape(x_adv))

        x_adv = tf.stop_gradient(x_adv + dx)
        x_adv = tf.clip_by_value(x_adv, clip_min, clip_max)
        epoch += 1
        pixel_mask = tf.cond(tf.greater(eps,
                                        0), lambda: tf.less(x_adv, clip_max),
                             lambda: tf.greater(x_adv, clip_min))

        return x_adv, epoch, pixel_mask
Exemple #44
0
def radialSpectrumMC(points, fourierSetup):

    with tf.name_scope('radialSpectrumMC'):

        #-------------------------------------------

        def sampleSpectrum(input):
            freqSamples = input[0]
            points = input[1]
            pointCount = tf.cast(tf.shape(points)[0], tf.float32)
            dotProduct = tf.tensordot(freqSamples, points, [[2], [1]])
            twoPi = 2.0 * math.pi
            real = tf.cos(twoPi * dotProduct)
            imag = tf.sin(twoPi * dotProduct)
            sumReal = tf.reduce_sum(real, -1)
            sumImag = tf.reduce_sum(imag, -1)
            power = (sumReal * sumReal + sumImag * sumImag) / pointCount
            return power

        #-------------------------------------------

        def ceilAwayFromZero(input):
            return tf.sign(input) * tf.ceil(tf.abs(input))

        #-------------------------------------------
         
        batchSize, _, dimCount = points.shape
        freqRes = fourierSetup.resolution
        freqStep = fourierSetup.freqStep
        mcSampleCount = fourierSetup.mcSamplesPerShell

        # generate normal samples
        normDst = tf.distributions.Normal(
            loc=np.full((dimCount,), 0.),
            scale=np.full((dimCount,), 1.))
        mcSamples = tf.cast(normDst.sample([batchSize, freqRes, mcSampleCount]), tf.float32)

        # project samples to unit hypersphere
        # https://dl.acm.org/citation.cfm?id=377946
        shellSamples = tf.nn.l2_normalize(mcSamples, axis=-1, epsilon=SQRT_EPS)
        
        # scale shells by frequencies
        frequencies = tf.range(
            start = 0,
            limit = freqRes * freqStep, 
            delta = freqStep, 
            dtype = tf.float32)
        
        shellSamples *= tf.reshape(frequencies, [1, freqRes, 1, 1])
        #shellSamples = tf.round(shellSamples)
        shellSamples = ceilAwayFromZero(shellSamples)
        
        # power spectrum for each frequency sample
        spectrum = tf.map_fn(
            lambda b: sampleSpectrum(b), 
            (shellSamples, points),
            dtype=tf.float32)
        
        # radial and batch average
        avg = tf.reduce_mean(spectrum, [0, -1])

        if fourierSetup.cancelDC:
            dcComp = avg[0]
            avg -= tf.scatter_nd([[0]], [dcComp], avg.shape)

        return avg
Exemple #45
0
 def vec_to_tri_vector(vector):
     return tf.scatter_nd(indices=indices, shape=[N, N], updates=vector)
    def build_model():
        with tf.name_scope('placeholders'):
            real_data_int = tf.placeholder(
                tf.uint8,
                [None, picture_size])  # uint8 with int values in [0, 255]
            x_true = tf.cast(real_data_int,
                             tf.float32) / 255.  # float with values in [0,1]
            z = tf.placeholder(tf.float32, [None, input_dim])
            if use_JL:
                JL = tf.placeholder(tf.float32, [picture_size, JL_dim])
                P_non_normalized = tf.placeholder(tf.float32,
                                                  [JL_dim, n_projections])
                P_non_normalized_SWD = tf.placeholder(
                    tf.float32, [picture_size, n_projections])
            else:
                JL = None
                P_non_normalized = tf.placeholder(
                    tf.float32, [picture_size, n_projections])
                P_non_normalized_SWD = None

        x_generated = generator(
            z,
            n_features_first=n_features_first,
            n_features_reduction_factor=n_features_reduction_factor,
            min_features=min_features,
            BN=BN,
            power=power,
            init_method=init_method)

        # define loss (big part taken from SWG)
        with tf.name_scope('loss'):
            # apply the Johnson-Lindenstrauss map, if wanted, to the flattened arrays
            if use_JL:
                JL_true = tf.matmul(x_true, JL)
                JL_gen = tf.matmul(x_generated, JL)
            else:
                JL_true = x_true
                JL_gen = x_generated

            # next project the samples (images). After being transposed, we have tensors
            # of the format: [[projected_image1_proj1, projected_image2_proj1, ...],
            #                 [projected_image1_proj2, projected_image2_proj2, ...],
            #                 ...]
            # Each row has the projections along one direction. This makes it easier for the sorting that follows.
            # first normalize the random normal vectors to lie in the sphere
            P = tf.nn.l2_normalize(P_non_normalized, axis=0)

            projected_true = tf.transpose(tf.matmul(JL_true, P))
            projected_fake = tf.transpose(tf.matmul(JL_gen, P))

            sorted_true, true_indices = tf.nn.top_k(input=projected_true,
                                                    k=batch_size)
            sorted_fake, fake_indices = tf.nn.top_k(input=projected_fake,
                                                    k=batch_size)

            # For faster gradient computation, we do not use sorted_fake to compute
            # loss. Instead we re-order the sorted_true so that the samples from the
            # true distribution go to the correct sample from the fake distribution.

            # It is less expensive (memory-wise) to rearrange arrays in TF.
            # Flatten the sorted_true from dim [n_projections, batch_size].
            flat_true = tf.reshape(sorted_true, [-1])

            # Modify the indices to reflect this transition to an array.
            # new index = row + index
            rows = np.asarray([
                batch_size * np.floor(i * 1.0 / batch_size)
                for i in range(n_projections * batch_size)
            ])
            rows = rows.astype(np.int32)
            flat_idx = tf.reshape(fake_indices, [-1, 1]) + np.reshape(
                rows, [-1, 1])

            # The scatter operation takes care of reshaping to the rearranged matrix
            shape = tf.constant([batch_size * n_projections])
            rearranged_true = tf.reshape(
                tf.scatter_nd(flat_idx, flat_true, shape),
                [n_projections, batch_size])

            generator_loss = tf.reduce_mean(
                tf.square(projected_fake - rearranged_true))

            # get for JLSWGN the sliced Wasserstein distance (SWD) (since SWD and JLSWD are not comparable)
            if use_JL:
                P_SWD = tf.nn.l2_normalize(P_non_normalized_SWD, axis=0)

                projected_true_SWD = tf.transpose(tf.matmul(x_true, P_SWD))
                projected_fake_SWD = tf.transpose(tf.matmul(
                    x_generated, P_SWD))

                sorted_true_SWD, true_indices_SWD = tf.nn.top_k(
                    input=projected_true_SWD, k=batch_size)
                sorted_fake_SWD, fake_indices_SWD = tf.nn.top_k(
                    input=projected_fake_SWD, k=batch_size)

                flat_true_SWD = tf.reshape(sorted_true_SWD, [-1])
                flat_idx_SWD = tf.reshape(fake_indices_SWD,
                                          [-1, 1]) + np.reshape(rows, [-1, 1])

                rearranged_true_SWD = tf.reshape(
                    tf.scatter_nd(flat_idx_SWD, flat_true_SWD, shape),
                    [n_projections, batch_size])

                SWD = tf.reduce_mean(
                    tf.square(projected_fake_SWD - rearranged_true_SWD))
            else:
                SWD = generator_loss

        with tf.name_scope('optimizer'):
            generator_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
            g_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                                 beta1=0.5)
            g_train = g_optimizer.minimize(generator_loss,
                                           var_list=generator_vars)

        # initialize variables using init_method
        session.run(tf.global_variables_initializer())

        return real_data_int, z, x_generated, JL, P_non_normalized, P_non_normalized_SWD, SWD, g_train
Exemple #47
0
def seq2seq(train_or_infer,
            x_id,
            y_id,
            keep_prob,
            batch_size,
            x_id_extended,
            y_id_extended,
            vocab_size_extend,
            word_embd_dim,
            dim_rnn,
            use_same_word_embd=False,
            encoder_word_embd_pretrain=None,
            encoder_vocab_size=None,
            decoder_word_embd_pretrain=None,
            decoder_vocab_size=None,
            target_seq_len_max=None):
    with tf.variable_scope('encoder') as scope_encoder:
        # encoder[0] [batch_size, source_seq_max_len]:源序列,未embedding,类型np.array
        encoder = [x_id]
        # batch_size = encoder[0].shape[0].value
        encoder_seq_max_len = encoder[0].shape[1].value
        encoder_seq_len = tf.cast(tf.reduce_sum(tf.sign(encoder[0]), axis=1),
                                  tf.int32)
        # encoder[1] [batch_size, source_seq_max_len, word_embd_dim]:对源序列数据进行embedding
        encoder_word_embd, encoder_vocab_size, word_embd_dim \
            = creat_word_embd(encoder_word_embd_pretrain, encoder_vocab_size, word_embd_dim, name='encoder_word_embd_matrix')
        encoder_w2v = tf.nn.embedding_lookup(encoder_word_embd, encoder[0])
        encoder.append(encoder_w2v)
        # encoder[2] ([batch_size,source_seq_max_len,dim_rnn*2], state_shape):构建encoder模型,并使用dynamic_rnn方法
        encoder_cell_fw_raw = tf.nn.rnn_cell.BasicLSTMCell(num_units=dim_rnn,
                                                           state_is_tuple=True)
        encoder_cell_fw = tf.nn.rnn_cell.DropoutWrapper(
            cell=encoder_cell_fw_raw, output_keep_prob=keep_prob)
        encoder_cell_bw_raw = tf.nn.rnn_cell.BasicLSTMCell(num_units=dim_rnn,
                                                           state_is_tuple=True)
        encoder_cell_bw = tf.nn.rnn_cell.DropoutWrapper(
            cell=encoder_cell_bw_raw, output_keep_prob=keep_prob)
        encoder_outputs, state = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=encoder_cell_fw,
            cell_bw=encoder_cell_bw,
            inputs=encoder[1],
            sequence_length=encoder_seq_len,
            dtype=tf.float32)
        memory = tf.concat(encoder_outputs, axis=2)
        init_state = init_state_reconstruct(encoder_state=state,
                                            encoder_state_type='bilstm',
                                            decoder_state_type='lstm',
                                            fill_zero=False)
        encoder.append((memory, init_state))
    with tf.variable_scope('decoder') as scope_decoder:
        # decoder[0] [batch_size,target_seq_len_max]:目标序列,未embedding,类型np.array
        if 'train' == train_or_infer:
            decoder = [y_id]
            # target_seq同时加上了'<SOS>'和'<EOS>',所以计算长度时要-1
            decoder_seq_len = tf.cast(
                tf.reduce_sum(tf.sign(decoder[0]) - 1, axis=1), tf.int32)
        elif 'infer' == train_or_infer:
            # 预测模式下,配置decoder[0]为step长度为1的<sos>,其中sos的code为1
            decoder = [tf.ones(shape=[batch_size, 1], dtype=tf.int32)]
        else:
            print('para train_or_infer have not been defined !!!')
            decoder = [tf.ones(shape=[batch_size, 1], dtype=tf.int32)]
        # decoder[1] [batch_size, target_seq_max_len, word_embd_dim]:对目标序列数据进行embedding
        if use_same_word_embd is True:
            decoder_word_embd = encoder_word_embd
            decoder_vocab_size = encoder_vocab_size
        else:
            decoder_word_embd, decoder_vocab_size, word_embd_dim \
                = creat_word_embd(decoder_word_embd_pretrain, decoder_vocab_size, word_embd_dim, name='decoder_word_embd_matrix')
        decoder_w2v = tf.nn.embedding_lookup(decoder_word_embd, decoder[0])
        decoder.append(decoder_w2v)
        # decoder[2] 构建decoder模型
        # outputs: list of [batch_size, dim_rnn] by length target_seq_len_max
        # aligns: list of [batch_size, encoder_step_len] by length target_seq_len_max
        # p_gens: list of [batch_size] by length target_seq_len_max
        with tf.variable_scope('dynamic_decoder') as scope_dynamic_decoder:
            decoder_cell_raw = tf.nn.rnn_cell.BasicLSTMCell(
                num_units=dim_rnn, state_is_tuple=True)
            decoder_cell = tf.nn.rnn_cell.DropoutWrapper(
                cell=decoder_cell_raw, output_keep_prob=keep_prob)
            outputs, aligns, p_gens, cell_state = dynamic_decoder(
                cell=decoder_cell,
                memory=memory,
                memory_seq_len=encoder_seq_len,
                init_state=init_state,
                train_or_infer=train_or_infer,
                decoder_seq_len_max=target_seq_len_max,
                target_seq_embd=decoder[1],
                decoder_word_embd=decoder_word_embd)
        decoder.append((outputs, aligns, p_gens))
        # decoder[3] 计算Generator_Network和Pointer_Network的输出
        # vocab_dist_extendeds: list [batch_size, decoder_vocab_size+vocab_size_extend] by length target_seq_len_max
        # attention_dist_extendeds: list [batch_size, decoder_vocab_size+vocab_size_extend] by len target_seq_len_max
        # p_gens: list of [batch_size] by length target_seq_len_max
        with tf.variable_scope('Generator_Network') as scope_Generator_Network:
            weight = tf.get_variable(
                name='weight',
                trainable=True,
                initializer=tf.truncated_normal(
                    [decoder_cell.output_size, decoder_vocab_size],
                    stddev=math.sqrt(
                        6 / (decoder_cell.output_size + decoder_vocab_size)),
                    dtype=tf.float32))
            bias = tf.get_variable(name='bias',
                                   trainable=True,
                                   initializer=tf.truncated_normal(
                                       [decoder_vocab_size],
                                       stddev=0.1,
                                       dtype=tf.float32))
            vocab_scores = []
            for step_num, output in enumerate(outputs):
                if step_num > 0:
                    tf.get_variable_scope().reuse_variables()
                vocab_scores.append(tf.nn.xw_plus_b(
                    output, weight, bias))  # apply the linear layer
            # vocab_distributions: list [batch_size, decoder_vocab_size] length of decoder_step_len
            vocab_distributions = [
                tf.nn.softmax(score) for score in vocab_scores
            ]
            # vocab_dist_extendeds: list [batch_size, decoder_vocab_size+vocab_size_extend] by length decoder_step_len
            vocab_dist_extendeds = [
                tf.pad(vocab_dist, paddings=[[0, 0], [0, vocab_size_extend]])
                for vocab_dist in vocab_distributions
            ]

        with tf.variable_scope('Pointer_Network') as scope_Pointer_Network:
            index_batch_num = tf.range(batch_size)
            index_batch_num = tf.expand_dims(index_batch_num, 1)
            index_batch_num = tf.tile(index_batch_num,
                                      [1, encoder_seq_max_len])
            index = tf.stack((index_batch_num, x_id_extended), axis=2)
            # attention_dist_extendeds: list [batch_size, decoder_vocab_size+vocab_size_extend] by len decoder_step_len
            attention_dist_extendeds = [
                tf.scatter_nd(
                    index, align,
                    [batch_size, encoder_vocab_size + vocab_size_extend])
                for align in aligns
            ]
            if use_same_word_embd is not True:
                # todo 这里其实是删去了源序列表内单词的概率,只保留OOV单词的概率,后续需进行修正。
                attention_dist_extendeds = [
                    tf.concat([
                        tf.zeros(shape=[batch_size, decoder_vocab_size],
                                 dtype=tf.float32),
                        att_dist[:, encoder_vocab_size:]
                    ],
                              axis=1) for att_dist in attention_dist_extendeds
                ]
        decoder.append(
            (vocab_dist_extendeds, attention_dist_extendeds, aligns))
        # decoder[4] 计算模型的最终输出
        # final_distributions: list of [batch_size, decoder_vocab_size+vocab_size_extend] by len target_seq_len_max
        with tf.variable_scope('Switching_Network') as scope_Switching_Network:
            # todo do not use copynet
            if False:
                final_distributions = vocab_dist_extendeds
            else:
                final_distributions = [
                    vocab_dist * p_gen + attn_dist * (1 - p_gen)
                    for (p_gen, vocab_dist, attn_dist) in zip(
                        p_gens, vocab_dist_extendeds, attention_dist_extendeds)
                ]

        decoder.append(final_distributions)
    # todo 引入非线性
    if 'train' == train_or_infer:
        return encoder + decoder
    else:
        return tf.concat([
            tf.expand_dims(step_output, axis=1) for step_output in decoder[-1]
        ],
                         axis=1)
Exemple #48
0
def likelihood_ratio_filter(node_pairs,
                            modified_adjacency,
                            original_adjacency,
                            d_min,
                            threshold=0.004):
    """
    Filter the input node pairs based on the likelihood ratio test proposed by Zügner et al. 2018, see
    https://dl.acm.org/citation.cfm?id=3220078. In essence, for each node pair return 1 if adding/removing the edge
    between the two nodes does not violate the unnoticeability constraint, and return 0 otherwise. Assumes unweighted
    and undirected graphs.

    Parameters
    ----------
    node_pairs: tf.Tensor, shape (e, 2) dtype int
        The e node pairs to consider, where each node pair consists of the two indices of the nodes.

    modified_adjacency: tf.Tensor shape (N,N) dtype int
        The input (modified) adjacency matrix. Assumed to be unweighted and symmetric.

    original_adjacency: tf.Tensor shape (N,N) dtype int
        The input (original) adjacency matrix. Assumed to be unweighted and symmetric.

    d_min: int
        The minimum degree considered in the Powerlaw distribution.

    threshold: float, default 0.004
        Cutoff value for the unnoticeability constraint. Smaller means stricter constraint. 0.004 corresponds to a
        p-value of 0.95 in the Chi-square distribution with one degree of freedom.

    Returns
    -------
    allowed_mask: tf.Tensor, shape (e,), dtype bool
        For each node pair p return True if adding/removing the edge p does not violate the
        cutoff value, False otherwise.

    current_ratio: tf.Tensor, shape (), dtype float
        The current value of the log likelihood ratio.

    """

    N = int(modified_adjacency.shape[0])

    original_degree_sequence = tf.cast(
        tf.reduce_sum(original_adjacency, axis=1), tf.float32)
    current_degree_sequence = tf.cast(
        tf.reduce_sum(modified_adjacency, axis=1), tf.float32)

    # Concatenate the degree sequences
    concat_degree_sequence = tf.concat(
        (current_degree_sequence[None, :], original_degree_sequence[None, :]),
        axis=1)
    # Compute the log likelihood values of the original, modified, and combined degree sequences.
    ll_orig, alpha_orig, n_orig, sum_log_degrees_original = degree_sequence_log_likelihood(
        original_degree_sequence, d_min)
    ll_current, alpha_current, n_current, sum_log_degrees_current = degree_sequence_log_likelihood(
        current_degree_sequence, d_min)
    ll_comb, alpha_comb, n_comb, sum_log_degrees_combined = degree_sequence_log_likelihood(
        concat_degree_sequence, d_min)
    # Compute the log likelihood ratio
    current_ratio = -2 * ll_comb + 2 * (ll_orig + ll_current)

    # Compute new log likelihood values that would arise if we add/remove the edges corresponding to each node pair.
    new_lls, new_alphas, new_ns, new_sum_log_degrees = updated_log_likelihood_for_edge_changes(
        node_pairs, tf.cast(modified_adjacency, tf.float32), d_min)

    # Combination of the original degree distribution with the distributions corresponding to each node pair.
    n_combined = n_orig + new_ns
    new_sum_log_degrees_combined = sum_log_degrees_original + new_sum_log_degrees
    alpha_combined = compute_alpha(n_combined, new_sum_log_degrees_combined,
                                   d_min)
    new_ll_combined = compute_log_likelihood(n_combined, alpha_combined,
                                             new_sum_log_degrees_combined,
                                             d_min)
    new_ratios = -2 * new_ll_combined + 2 * (new_lls + ll_orig)

    # Allowed edges are only those for which the resulting likelihood ratio measure is < than the threshold
    allowed_edges = new_ratios < threshold
    filtered_edges = tf.boolean_mask(node_pairs, allowed_edges)

    # Get the flattened indices for the allowed edges [e,2] -> [e,], similar to np.ravel_multi_index
    flat_ixs = ravel_multiple_indices(tf.cast(filtered_edges, tf.int32),
                                      modified_adjacency.shape)
    # Also for the reverse direction (we assume unweighted graphs).
    flat_ixs_reverse = ravel_multiple_indices(
        tf.reverse(tf.cast(filtered_edges, tf.int32), [1]),
        modified_adjacency.shape)

    # Construct a [N * N] array with ones at the admissible node pair locations and 0 everywhere else.
    indices_1 = tf.scatter_nd(flat_ixs[:, None],
                              tf.ones_like(flat_ixs, dtype=tf.float32),
                              shape=[N * N])
    indices_2 = tf.scatter_nd(flat_ixs_reverse[:, None],
                              tf.ones_like(flat_ixs_reverse, dtype=tf.float32),
                              shape=[N * N])

    # Add both directions
    allowed_mask = tf.clip_by_value(indices_1 + indices_2, 0, 1)

    return allowed_mask, current_ratio
Exemple #49
0
    def __init__(self,
                 params,
                 prior_embeddings=None,
                 initializer_nvdm=None,
                 topic_coherence_embeddings=None):

        self.vocab_size = params.TM_vocab_length
        self.n_hidden = params.hidden_size_TM
        self.n_topic = n_topic
        self.n_sample = n_sample
        self.non_linearity = non_linearity
        self.learning_rate = params.learning_rate
        self.batch_size = params.batch_size

        self.x = tf.placeholder(tf.float32, [None, self.vocab_size], name='x')
        self.mask = tf.placeholder(tf.float32, [None],
                                   name='mask')  # mask paddings
        if params.use_sent_topic_rep:
            self.x_sent = tf.placeholder(tf.float32,
                                         [None, None, self.vocab_size],
                                         name='x_sent')

        if params.use_topic_embedding:
            self.x_doc_mask = tf.placeholder(tf.float32,
                                             [None, self.vocab_size],
                                             name='x_doc_mask')

        #self.input_batch_size = tf.placeholder(tf.int32, (), name='input_batch_size')
        self.input_batch_size = tf.shape(self.x)[0]
        if params.use_sent_topic_rep:
            self.input_batch_size_sent = tf.shape(self.x_sent)[0]
            self.input_batch_len_sent = tf.shape(self.x_sent)[1]
            self.batch_size_sent = self.input_batch_size_sent * self.input_batch_len_sent

        # encoder
        with tf.variable_scope('TM_encoder', reuse=tf.AUTO_REUSE):
            self.enc_vec = utils.mlp(self.x, [self.n_hidden],
                                     self.non_linearity)
            #self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden], self.non_linearity)
            self.mean = utils.nvdm_linear(
                self.enc_vec,
                self.n_topic,
                scope='mean',
                #matrix_initializer=initializer_nvdm[1][0],
                matrix_initializer=None,
                #bias_initializer=initializer_nvdm[1][1])
                bias_initializer=None)
            self.logsigm = utils.nvdm_linear(
                self.enc_vec,
                self.n_topic,
                bias_start_zero=True,
                matrix_start_zero=True,
                scope='logsigm',
                #matrix_initializer=initializer_nvdm[2][0],
                matrix_initializer=None,
                #bias_initializer=initializer_nvdm[2][1])
                bias_initializer=None)
            self.kld = -0.5 * tf.reduce_sum(
                1 - tf.square(self.mean) + 2 * self.logsigm -
                tf.exp(2 * self.logsigm), 1)
            #self.kld = self.mask*self.kld  # mask paddings
            self.kld = tf.multiply(self.mask, self.kld,
                                   name='kld')  # mask paddings

            if params.use_sent_topic_rep:
                self.x_sent_reshape = tf.reshape(self.x_sent,
                                                 [-1, self.vocab_size])
                self.enc_vec_sent = utils.mlp(self.x_sent_reshape,
                                              [self.n_hidden],
                                              self.non_linearity)
                self.mean_sent = utils.nvdm_linear(self.enc_vec_sent,
                                                   self.n_topic,
                                                   scope='mean')
                self.logsigm_sent = utils.nvdm_linear(self.enc_vec_sent,
                                                      self.n_topic,
                                                      bias_start_zero=True,
                                                      matrix_start_zero=True,
                                                      scope='logsigm')

            if params.prior_emb_for_topics or params.topic_coherence_reg:
                W_prior = tf.get_variable('embeddings_TM_prior',
                                          dtype=tf.float32,
                                          initializer=prior_embeddings,
                                          trainable=False)

        with tf.variable_scope('TM_decoder', reuse=tf.AUTO_REUSE):
            if self.n_sample == 1:
                eps = tf.random_normal((self.input_batch_size, self.n_topic),
                                       mean=0.0,
                                       stddev=1.0,
                                       seed=seed)
                #doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean
                self.doc_vec = tf.add(tf.multiply(tf.exp(self.logsigm), eps),
                                      self.mean,
                                      name='doc_hidden')
                if GSM:
                    self.doc_vec = tf.nn.softmax(self.doc_vec, axis=1)
                self.last_h = self.doc_vec
                logits_projected, self.decoding_matrix = utils.nvdm_linear(
                    self.doc_vec,
                    self.vocab_size,
                    scope='projection',
                    get_matrix=True,
                    #matrix_initializer=initializer_nvdm[3][0],
                    matrix_initializer=None,
                    #bias_initializer=initializer_nvdm[3][1])
                    bias_initializer=None)
                logits = tf.nn.log_softmax(logits_projected)
                self.recons_loss = -tf.reduce_sum(tf.multiply(logits, self.x),
                                                  1)
            else:
                #eps = tf.random_normal((self.n_sample*self.batch_size, self.n_topic), mean=0.0, stddev=1.0)
                eps = tf.random_normal(
                    (self.n_sample * self.input_batch_size, self.n_topic),
                    mean=0.0,
                    stddev=1.0,
                    seed=seed)
                eps_list = tf.split(eps, self.n_sample, 0)
                recons_loss_list = []
                doc_vec_list = []
                for i in range(self.n_sample):
                    if i > 0: tf.get_variable_scope().reuse_variables()
                    curr_eps = eps_list[i]
                    doc_vec = tf.add(
                        tf.multiply(tf.exp(self.logsigm), curr_eps), self.mean)
                    if GSM:
                        doc_vec = tf.nn.softmax(doc_vec, axis=1)
                    doc_vec_list.append(doc_vec)
                    logits, self.decoding_matrix = utils.nvdm_linear(
                        doc_vec,
                        self.vocab_size,
                        scope='projection',
                        get_matrix=True,
                        matrix_initializer=None,
                        bias_initializer=None)
                    logits = tf.nn.log_softmax(logits)
                    recons_loss_list.append(
                        -tf.reduce_sum(tf.multiply(logits, self.x), 1))
                self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample
                self.doc_vec = tf.add_n(doc_vec_list) / self.n_sample
                self.last_h = self.doc_vec

            # TOPIC EMBEDDING CODE

            if params.use_topic_embedding:
                topics_masked = tf.multiply(tf.expand_dims(self.x_doc_mask,
                                                           axis=1),
                                            tf.expand_dims(
                                                self.decoding_matrix, axis=0),
                                            name='topics_masked')
                self.top_k = tf.nn.top_k(topics_masked,
                                         k=params.use_k_topic_words,
                                         sorted=False)
                if params.prior_emb_for_topics:
                    self.top_k_embeddings = tf.nn.embedding_lookup(
                        W_prior, self.top_k.indices)
                    if concat_topic_emb_and_prop:
                        self.topic_emb_size = prior_embeddings.shape[
                            1] + self.n_topic
                    else:
                        self.topic_emb_size = prior_embeddings.shape[1]
                else:
                    self.top_k_embeddings = tf.nn.embedding_lookup(
                        tf.transpose(self.decoding_matrix), self.top_k.indices)
                    if concat_topic_emb_and_prop:
                        self.topic_emb_size = self.n_topic * 2
                    else:
                        self.topic_emb_size = self.n_topic
                self.topic_embeddings = tf.reduce_mean(self.top_k_embeddings,
                                                       axis=2,
                                                       name='topic_embeddings')

                if params.use_k_topics > 0:
                    # Masking document topic proportion vector
                    top_k_h_values, top_k_h_indices = tf.nn.top_k(
                        self.last_h,
                        k=params.use_k_topics,
                        sorted=False,
                        name='top_k_h')
                    row_numbers = tf.tile(tf.expand_dims(
                        tf.range(0, self.input_batch_size), 1),
                                          [1, params.use_k_topics],
                                          name='row_numbers')
                    full_indices = tf.concat([
                        tf.expand_dims(row_numbers, -1),
                        tf.expand_dims(top_k_h_indices, -1)
                    ],
                                             axis=2)
                    full_indices = tf.reshape(full_indices, [-1, 2],
                                              name='full_indices')
                    last_h_softmax = tf.scatter_nd(
                        full_indices,
                        tf.reshape(tf.nn.softmax(top_k_h_values, axis=1),
                                   [-1]),
                        #tf.ones([self.input_batch_size * params.use_k_topics], dtype=tf.float32),
                        [self.input_batch_size, self.n_topic],
                        name='last_h_softmax')
                else:
                    last_h_softmax = tf.nn.softmax(self.last_h,
                                                   axis=1,
                                                   name='last_h_softmax')

                self.last_h_topic_emb = tf.squeeze(tf.matmul(
                    tf.expand_dims(last_h_softmax, axis=1),
                    self.topic_embeddings),
                                                   axis=1,
                                                   name='last_h_topic_emb')
                if concat_topic_emb_and_prop:
                    self.last_h_topic_emb = tf.concat(
                        [self.last_h_topic_emb, self.last_h],
                        axis=1,
                        name='last_h_topic_emb_concat')

            # Code segment for Sentence-level topical discourse

            if params.use_sent_topic_rep:
                if self.n_sample == 1:
                    eps_sent = tf.random_normal(
                        (self.batch_size_sent, self.n_topic),
                        mean=0.0,
                        stddev=1.0,
                        seed=seed)
                    self.last_h_sent = tf.add(tf.multiply(
                        tf.exp(self.logsigm_sent), eps_sent),
                                              self.mean_sent,
                                              name='sent_hidden')
                else:
                    eps_sent = tf.random_normal(
                        (self.n_sample * self.batch_size_sent, self.n_topic),
                        mean=0.0,
                        stddev=1.0,
                        seed=seed)
                    eps_sent_list = tf.split(eps_sent, self.n_sample, 0)
                    recons_loss_list = []
                    sent_vec_list = []
                    for i in range(self.n_sample):
                        if i > 0: tf.get_variable_scope().reuse_variables()
                        curr_eps = eps_sent_list[i]
                        sent_vec = tf.add(
                            tf.multiply(tf.exp(self.logsigm_sent), curr_eps),
                            self.mean_sent)
                        if GSM:
                            sent_vec = tf.nn.softmax(sent_vec, axis=1)
                        sent_vec_list.append(sent_vec)
                    self.last_h_sent = tf.add_n(sent_vec_list) / self.n_sample
                self.last_h_sent = tf.reshape(self.last_h_sent, [
                    self.input_batch_size_sent, self.input_batch_len_sent,
                    self.n_topic
                ])

                if params.use_topic_embedding:
                    if params.use_k_topics > 0:
                        # Masking sentence topic proportion vector
                        top_k_h_sent_values, top_k_h_sent_indices = tf.nn.top_k(
                            self.last_h_sent,
                            k=params.use_k_topics,
                            sorted=False,
                            name='top_k_h_sent')
                        row_numbers_sent = tf.tile(tf.expand_dims(
                            tf.range(0, self.batch_size_sent), 1),
                                                   [1, params.use_k_topics],
                                                   name='row_numbers_sent')
                        full_indices_sent = tf.concat([
                            tf.expand_dims(row_numbers_sent, -1),
                            tf.expand_dims(top_k_h_sent_indices, -1)
                        ],
                                                      axis=2)
                        full_indices_sent = tf.reshape(
                            full_indices_sent, [-1, 2],
                            name='full_indices_sent')
                        last_h_softmax_sent = tf.scatter_nd(
                            full_indices_sent,
                            tf.reshape(
                                tf.nn.softmax(top_k_h_sent_values, axis=1),
                                [-1]), [self.batch_size_sent, self.n_topic],
                            name='last_h_softmax_sent')
                    else:
                        last_h_softmax_sent = tf.nn.softmax(
                            self.last_h_sent,
                            axis=2,
                            name='last_h_softmax_sent')

                    self.last_h_topic_emb_sent = tf.matmul(
                        last_h_softmax_sent,
                        self.topic_embeddings,
                        name='last_h_topic_emb_sent')
                    if concat_topic_emb_and_prop:
                        self.last_h_topic_emb_sent = tf.concat(
                            [self.last_h_topic_emb_sent, self.last_h_sent],
                            axis=2,
                            name='last_h_topic_emb_sent_concat')

        #self.objective_TM = self.recons_loss + self.kld
        #self.objective_TM = tf.add(self.recons_loss, self.kld, name='TM_loss_unnormed')
        self.final_loss = tf.add(self.recons_loss,
                                 self.kld,
                                 name='TM_loss_unnormed')
        self.objective_TM = tf.reduce_mean(self.final_loss)

        if params.TM_uniqueness_loss:
            ## TCNLM topic uniqueness loss
            normed_topic_matrix = self.decoding_matrix / tf.reduce_sum(
                self.decoding_matrix, axis=1, keepdims=True)
            l2_normalized_topic_matrix = tf.nn.l2_normalize(
                normed_topic_matrix, axis=1)
            cosine_similarity = tf.matmul(l2_normalized_topic_matrix,
                                          l2_normalized_topic_matrix,
                                          transpose_a=False,
                                          transpose_b=True)
            cosine_distance = tf.subtract(1.0, cosine_similarity)
            mean_cosine_distance = tf.reduce_mean(cosine_distance)
            variance = tf.reduce_mean(
                tf.square(tf.subtract(cosine_distance, mean_cosine_distance)))
            #uniqueness_loss = mean_cosine_distance - variance
            uniqueness_loss = -mean_cosine_distance + variance
            self.objective_TM += params.alpha_uniqueness * uniqueness_loss
            #self.objective_TM += 0.01 * uniqueness_loss

        if params.topic_coherence_reg:
            #E_normalized = W_prior / tf.reduce_sum(W_prior, axis=1, keepdims=True)
            E_normalized = tf.nn.l2_normalize(W_prior,
                                              axis=1,
                                              name='E_normalized')
            #W_normalized = self.decoding_matrix / tf.reduce_sum(self.decoding_matrix, axis=1, keepdims=True)
            W_normalized = tf.nn.l2_normalize(self.decoding_matrix,
                                              axis=1,
                                              name='W_normalized')
            topic_vectors = tf.transpose(tf.matmul(W_normalized, E_normalized),
                                         [1, 0],
                                         name='topic_vectors')
            #topic_vectors_normalized = topic_vectors / tf.reduce_sum(topic_vectors, axis=1, name='topic_vectors_normalized')
            topic_vectors_normalized = tf.nn.l2_normalize(
                topic_vectors, axis=0, name='topic_vectors_normalized')
            cos_sim_matrix = tf.transpose(tf.matmul(E_normalized,
                                                    topic_vectors_normalized),
                                          [1, 0],
                                          name='cos_sim_matrix')
            coherence_loss = -tf.reduce_sum(tf.multiply(
                cos_sim_matrix, W_normalized),
                                            name="coherence_loss")
            self.objective_TM += params.beta_coherence * coherence_loss

        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        #fullvars = tf.trainable_variables()

        #enc_vars = utils.variable_parser(fullvars, 'TM_encoder')
        enc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     scope='TM_encoder')
        #dec_vars = utils.variable_parser(fullvars, 'TM_decoder')
        dec_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     scope='TM_decoder')

        enc_grads = tf.gradients(self.objective_TM, enc_vars)
        dec_grads = tf.gradients(self.objective_TM, dec_vars)

        self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars))
        self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
Exemple #50
0
def mask_adaptive_logsoftmax(hidden,
                             target,
                             n_token,
                             d_embed,
                             d_proj,
                             cutoffs,
                             params,
                             tie_projs,
                             initializer=None,
                             proj_initializer=None,
                             div_val=1,
                             scope='adaptive_softmax',
                             proj_same_dim=True,
                             return_mean=True,
                             **kwargs):
    def _logit(x, W, b, proj):
        y = x
        if proj is not None:
            y = tf.einsum('ibd,ed->ibe', y, proj)
        return tf.einsum('ibd,nd->ibn', y, W) + b

    params_W, params_projs = params[0], params[1]

    def _gather_logprob(logprob, target):
        lp_size = tf.shape(logprob)
        r = tf.range(lp_size[0])
        idx = tf.stack([r, target], 1)
        return tf.gather_nd(logprob, idx)

    with tf.variable_scope(scope):
        if len(cutoffs) == 0:
            softmax_b = tf.get_variable('bias', [n_token],
                                        initializer=tf.zeros_initializer())
            output = _logit(hidden, params_W, softmax_b, params_projs)
            nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target,
                                                                 logits=output)
        else:
            cutoff_ends = [0] + cutoffs + [n_token]
            nll = tf.zeros_like(target, dtype=tf.float32)
            for i in range(len(cutoff_ends) - 1):
                with tf.variable_scope('cutoff_{}'.format(i)):
                    l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
                    mask = (target >= l_idx) & (target < r_idx)
                    mask_idx = tf.where(mask)
                    cur_target = tf.boolean_mask(target, mask) - l_idx
                    cur_d_embed = d_embed // (div_val**i)

                    if div_val == 1:
                        cur_W = params_W[l_idx:r_idx]
                    else:
                        cur_W = params_W[i]
                    cur_b = tf.get_variable('b', [r_idx - l_idx],
                                            initializer=tf.zeros_initializer())
                    if tie_projs[i]:
                        if div_val == 1:
                            cur_proj = params_projs
                        else:
                            cur_proj = params_projs[i]
                    else:
                        if (div_val == 1 or
                                not proj_same_dim) and d_proj == cur_d_embed:
                            cur_proj = None
                        else:
                            cur_proj = tf.get_variable(
                                'proj', [cur_d_embed, d_proj],
                                initializer=proj_initializer)
                    if i == 0:
                        cluster_W = tf.get_variable(
                            'cluster_W', [len(cutoffs), d_embed],
                            initializer=tf.zeros_initializer())
                        cluster_b = tf.get_variable(
                            'cluster_b', [len(cutoffs)],
                            initializer=tf.zeros_initializer())
                        cur_W = tf.concat([cur_W, cluster_W], 0)
                        cur_b = tf.concat([cur_b, cluster_b], 0)

                        head_logit = _logit(hidden, cur_W, cur_b, cur_proj)
                        head_logprob = tf.nn.log_softmax(head_logit)
                        cur_head_logprob = tf.boolean_mask(head_logprob, mask)
                        cur_logprob = _gather_logprob(cur_head_logprob,
                                                      cur_target)
                    else:
                        cur_head_logprob = tf.boolean_mask(head_logprob, mask)
                        cur_hidden = tf.boolean_mask(hidden, mask)
                        tail_logit = tf.squeeze(
                            _logit(cur_hidden[None], cur_W, cur_b, cur_proj),
                            0)
                        tail_logprob = tf.nn.log_softmax(tail_logit)
                        cur_logprob = (
                            cur_head_logprob[:, cutoff_ends[1] + i - 1] +
                            _gather_logprob(tail_logprob, cur_target))
                    nll += tf.scatter_nd(mask_idx, -cur_logprob,
                                         tf.to_int64(tf.shape(nll)))
    if return_mean:
        nll = tf.reduce_mean(nll)
    return nll
Exemple #51
0
  def call(self, x, padding=None):
    """Return outputs of the feedforward network.

    Args:
      x: tensor with shape [batch_size, length, hidden_size]
      padding: (optional) If set, the padding values are temporarily removed
        from x (provided self.allow_pad is set). The padding values are placed
        back in the output tensor in the same locations.
        shape [batch_size, length]

    Returns:
      Output of the feedforward network.
      tensor with shape [batch_size, length, hidden_size]
    """
    padding = None if not self.allow_pad else padding

    # Retrieve dynamically known shapes
    batch_size = tf.shape(x)[0]
    length = tf.shape(x)[1]

    if padding is not None:
      with tf.name_scope("remove_padding"):
        # Flatten padding to [batch_size*length]
        pad_mask = tf.reshape(padding, [-1])

        nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9))

        # Reshape x to [batch_size*length, hidden_size] to remove padding
        x = tf.reshape(x, [-1, self.hidden_size])
        x = tf.gather_nd(x, indices=nonpad_ids)

        # Reshape x from 2 dimensions to 3 dimensions.
        x.set_shape([None, self.hidden_size])
        x = tf.expand_dims(x, axis=0)

    # debug
    #print("in ffn_layer.py x.shape")
    #print(x.shape)

    output = self.filter_dense_layer(x)
    #print("in ffn_layer.py output.shape 1")
    #print(output.shape)


    if self.train:
      output = tf.nn.dropout(output, 1.0 - self.relu_dropout)

    output = self.output_dense_layer(output)
    #print("in ffn_layer.py output.shape 2")
    #print(output.shape)

    if padding is not None:
      with tf.name_scope("re_add_padding"):
        output = tf.squeeze(output, axis=0)
        output = tf.scatter_nd(
            indices=nonpad_ids,
            updates=output,
            shape=[batch_size * length, self.output_size]
        )
        output = tf.reshape(output, [batch_size, length, self.output_size])
    return output
Exemple #52
0
def projection(XYZ, K, Extrinsic, H=224, W=224, reuse=False): #[B,N,3],[B,3,3],[B,4,4]
    """
    Apply projection to the point cloud
    Params:
    -- XYZ       : Point cloud. Tensor [batch_size, point_number, 3
    -- K         : Internal parameters. Tensor [batch_size, 3, 3]
    -- Extrinsic : External parameters. Tensor [batch_size, 4, 4]
    -- H         : Downsampled height
    -- W         : Downsampled weight

    Returns:
    -- newDepth  : Depth image. Tensor [batch_size, H, W]
    -- Cloud_mask: Front(visible) points mask. Tensor[batch_size, point_number]
    """


    XYZ = tf.transpose(XYZ, [0,2,1])
    batchSize = tf.shape(XYZ)[0]
    downscale_H = 224.0/H
    downscale_W = 224.0/W
    K = K*np.array([[1.0/downscale_H], [1.0/downscale_W], [1]], dtype=np.float32)
    N = tf.shape(XYZ)[2]
    H = tf.constant(H)
    W = tf.constant(W)
    bg = pow(2,16) - 1
    with tf.variable_scope("transform_render2D") as scope:
        if reuse:
            scope.reuse_variables()
        
        # ------ use camera calibration to compute new XYZ ------
        ones = tf.ones([batchSize, 1, N])
        XYZ = tf.concat([XYZ,ones], axis=1)# [B,4,N]
        XYZtemp = tf.matmul(Extrinsic, XYZ)# [B,4,N] = [B,4,4]*[B,4,N]
        XYZtemp = XYZtemp[:,:3,:]
        XYZnew = tf.matmul(K, XYZtemp)# [B,3,N] = [B,3,3]*[B,3,N]
        XYZnew = tf.transpose(XYZnew, [0,2,1]) # [B,N,3]
        eps = 1e-12
        X = tf.reshape(tf.to_int32(tf.round(tf.div(XYZnew[:,:,0], XYZnew[:,:,2] + eps))), [-1]) #[B*N,]
        Y = tf.reshape(tf.to_int32(tf.round(tf.div(XYZnew[:,:,1], XYZnew[:,:,2] + eps))), [-1]) #[B*N,]
        YX = tf.stack([Y,X], axis=1) #[B*N,2]
        Batch = tf.range(0, batchSize, 1)
        Batch = tf.tile(tf.expand_dims(Batch, axis=1),[1,N]) 
        Batch = tf.reshape(Batch, [batchSize*N, 1])
        scatterIndex = tf.concat([Batch, YX], axis=1) #[B*N,3]         
        scatterZ = tf.reshape(XYZnew[:,:,2],[-1]) #[B*N,]
        
        # ------ delete invalid points ------
        _, Y_Index, X_Index = tf.split(scatterIndex, 3, axis=1) #[B*N,1]
        X_Index = tf.squeeze(X_Index)
        Y_Index = tf.squeeze(Y_Index)
        Cloud_mask_pre = tf.range(0,batchSize*N,1)
        mask_inside = (X_Index >= 0)&(X_Index < W)&(Y_Index >= 0)&(Y_Index < H)&(scatterZ >=0)&(scatterZ <=10)
        mask_inside.set_shape([None])
        Cloud_mask_pre = tf.boolean_mask(Cloud_mask_pre,mask_inside)
        scatterIndex = tf.boolean_mask(scatterIndex, mask_inside)
        scatterZ = depthToint16(tf.boolean_mask(scatterZ, mask_inside)) #[B*N,]

        # ------ select front (visible) points ------
        seg_id = scatterIndex[:,0]*H*W + scatterIndex[:,1]*W + scatterIndex[:,2]
        seg_min = tf.unsorted_segment_max(-scatterZ, seg_id, batchSize*H*W) #[B*H*W,]
        seg_mask = tf.gather_nd(-seg_min, tf.expand_dims(seg_id, axis=1)) #[B*N,]
        mask = ((scatterZ - seg_mask) <= 0)
        Cloud_mask_pre = tf.boolean_mask(Cloud_mask_pre, mask)
        scatterIndex = tf.boolean_mask(scatterIndex, mask)

        # ------ compute depth images ------
        scatterZ = tf.boolean_mask(scatterZ, mask)
        scatterZ =  scatterZ - bg
        newDepth = tf.scatter_nd(scatterIndex, scatterZ, shape=[batchSize, H, W]) #[B,H,W]
        newDepth = newDepth + bg
  
        # ------ compute front mask given extrinsic ------
        Cloud_mask = tf.scatter_nd(tf.expand_dims(Cloud_mask_pre, axis=1), tf.ones_like(Cloud_mask_pre), shape=[batchSize*N])
        Cloud_mask = (Cloud_mask > 0)
        Cloud_mask = tf.reshape(Cloud_mask, [batchSize,N])
   
        return newDepth, Cloud_mask
Exemple #53
0
    def call(self, seq1, seq2):
        """Creates targets for pairwise sequence alignment task from proj. MSA rows.

    Given a pair of projected rows from an MSA (i.e., with positions at which
    both rows have a gap removed), the ground-truth alignment targets are
    obtained by:
    1) Each position in the projected MSA is classified as _MATCH, _GAP_IN_X or
       _GAP_IN_Y.
    2) The positions of match states are retrieved, as well as the starting
       position of each sequence in the ground-truth (local) alignment.
    3) Positions before the first match state or after the last match state are
       discarded, as these do not belong to the local ground-truth alignment.
    4) For each pair of consecutive match states, where consecutive here is to
       be understood when ignoring non-match states, it is checked whether there
       are BOTH _GAP_IN_X and _GAP_IN_Y states in between.
    5) For each pair of consecutive match states with both _GAP_IN_X and
       _GAP_IN_Y states in between, these states are canonically sorted to
       ensure all _GAP_IN_X states occur first, being followed by all _GAP_IN_Y
       states.
    6) We encode transitions, that is, ordered tuples (s_old, s_new) of states
       using the 9 hidden state model described in `look_up` (c.f. `init`), with
       initial transition (_START, _MATCH) encoded as in `self._init_trans`.
    7) Given the new sequence of states, we reconstructed the positions in each
       sequence where those states would occur.
    8) Finally, optionally, if any special tokens are to be prepended to the
       sequences after this transformation, the ground-truth alignment targets
       will be adjusted accordingly. Note, however, that tokens being appended
       require no further modification.

    Args:
      seq1: A tf.Tensor<int>[len], representing the first proj. row of the MSA.
      seq2: A tf.Tensor<int>[len], representing the second proj. row of the MSA.

    Returns:
      A tf.Tensor<int>[3, tar_len] with three stacked tf.Tensor<int>[tar_len],
      pos1, pos2 and enc_trans, such that (pos1[i], pos2[i], enc_trans[i])
      represents the i-th transition in the ground-truth alignment. For example,
        (pos1[0], pos2[0], enc_trans[0]) = (1, 1, 3)
      would represent that the first transition in the ground-truth alignment is
      from the start state _START to the _MATCH(1,1) state whereas
        (pos1[2], pos2[2], enc_trans[2]) = (2, 5, 4)
      would represent that the third transition in the ground-truth alignment is
      from the match state _MATCH(2, 4) to the gap in X state _GAP_IN_X(2, 5).
      Both pos1 and pos2 use one-based indexing, reserving the use of the value
      zero for padding. In rare cases where the sequence pair has no aligned
      characters, tar_len will be zero.
    """
        keep_indices1 = tf.cast(
            self._vocab.compute_mask(seq1, self._gap_token), tf.int32)
        keep_indices2 = tf.cast(
            self._vocab.compute_mask(seq2, self._gap_token), tf.int32)
        states = keep_indices1 - keep_indices2
        m_states = tf.cast(tf.reshape(tf.where(states == self._MATCH), [-1]),
                           tf.int32)
        n_matches = len(m_states)
        if n_matches == 0:
            return tf.zeros([3, 0], tf.int32)
        start, end = m_states[0], m_states[-1]
        offset1 = tf.reduce_sum(keep_indices1[:start])
        offset2 = start - offset1
        offset1 += self._n_prepend_tokens
        offset2 += self._n_prepend_tokens
        states = states[start:end + 1]
        keep_indices1 = keep_indices1[start:end + 1]
        keep_indices2 = keep_indices2[start:end + 1]
        m_states -= start
        segment_ids = tf.cumsum(
            tf.scatter_nd(m_states[1:, tf.newaxis],
                          tf.ones(n_matches - 1, dtype=tf.int32),
                          shape=[len(states)]))
        aux1 = tf.math.segment_sum(1 - keep_indices1, segment_ids)[:-1]
        aux2 = tf.math.segment_max(1 - keep_indices2, segment_ids)[:-1]
        gap_gap_trans_m_states_indices = tf.reshape(tf.where(aux1 * aux2),
                                                    [-1])
        if len(gap_gap_trans_m_states_indices) > 0:  # pylint: disable=g-explicit-length-test
            for idx in gap_gap_trans_m_states_indices:
                s_i, e_i = m_states[idx] + 1, m_states[idx + 1]
                m_i = s_i + aux1[idx]
                v_x = tf.fill([aux1[idx]], self._GAP_IN_X)
                v_y = tf.fill([e_i - m_i], self._GAP_IN_Y)
                states = tf.raw_ops.TensorStridedSliceUpdate(input=states,
                                                             begin=[s_i],
                                                             end=[m_i],
                                                             strides=[1],
                                                             value=v_x)
                states = tf.raw_ops.TensorStridedSliceUpdate(input=states,
                                                             begin=[m_i],
                                                             end=[e_i],
                                                             strides=[1],
                                                             value=v_y)
        # Builds transitions.
        enc_trans = tf.gather(self._trans_encoder,
                              self._hash_fn(states[:-1], states[1:]))
        enc_trans = tf.concat([self._init_trans, enc_trans], 0)
        # Positions such that (pos1[i], pos2[i]) for i = 0, ..., align_len - 1
        # describes the alignment "path".
        pos1 = offset1 + tf.cumsum(tf.cast(states >= self._MATCH, tf.int32))
        pos2 = offset2 + tf.cumsum(tf.cast(states <= self._MATCH, tf.int32))
        return tf.stack([pos1, pos2, enc_trans])
Exemple #54
0
    def beam_decode(self,
                    init_h=None,
                    encoder_outputs=None,
                    input_valid_length=None,
                    decode=False):
        """
        Args:
            encoder_outputs (Variable, FloatTensor): [batch_size, source_length, hidden_size]
            input_valid_length (Variable, LongTensor): [batch_size] (optional)
            init_h (variable, FloatTensor): [batch_size, hidden_size] (optional)
        Return:
            out   : [batch_size, seq_len]
        """
        batch_size = self.batch_size(h=init_h)

        # [batch_size x beam_size]
        x = self.init_token(batch_size * self.beam_size, SOS_ID)

        # [num_layers, batch_size x beam_size, hidden_size]
        h = tf.tile(self.init_h(batch_size, hidden=init_h),
                    [1, self.beam_size, 1])

        # batch_position [batch_size]
        #   [0, beam_size, beam_size * 2, .., beam_size * (batch_size-1)]
        #   Points where batch starts in [batch_size x beam_size] tensors
        #   Ex. position_idx[5]: when 5-th batch starts
        batch_position = tf.range(0, batch_size,
                                  dtype=tf.int32) * self.beam_size

        # Initialize scores of sequence
        # [batch_size x beam_size]
        # Ex. batch_size: 5, beam_size: 3
        # [0, -inf, -inf, 0, -inf, -inf, 0, -inf, -inf, 0, -inf, -inf, 0, -inf, -inf]
        indice = tf.reshape(batch_position, [-1, 1])
        shape = tf.constant([batch_size * self.beam_size])
        updates = tf.constant([1] * batch_size)
        score = tf.cast((tf.scatter_nd(indice, updates, shape) - 1),
                        tf.float32) * float(9999999999)

        # Initialize Beam that stores decisions for backtracking
        beam = Beam(batch_size, self.hidden_size, self.vocab_size,
                    self.beam_size, self.max_unroll, batch_position)

        for i in range(self.max_unroll):
            # x: [batch_size x beam_size]; (token index)
            # =>
            # out: [batch_size x beam_size, vocab_size]
            # h: [num_layers, batch_size x beam_size, hidden_size]
            out, h = self.forward_step(x,
                                       h,
                                       encoder_outputs=encoder_outputs,
                                       input_valid_length=input_valid_length)
            # log_prob: [batch_size x beam_size, vocab_size]
            log_prob = tf.nn.softmax(out, axis=1)

            # [batch_size x beam_size]
            # => [batch_size x beam_size, vocab_size]
            score = tf.reshape(score, [-1, 1]) + log_prob

            # Select `beam size` transitions out of `vocab size` combinations

            # [batch_size x beam_size, vocab_size]
            # => [batch_size, beam_size x vocab_size]
            # Cutoff and retain candidates with top-k scores
            # score: [batch_size, beam_size]
            # top_k_idx: [batch_size, beam_size]
            #       each element of top_k_idx [0 ~ beam x vocab)

            score, top_k_idx = tf.math.top_k(
                tf.reshape(score, [batch_size, -1]), self.beam_size)

            # Get token ids with remainder after dividing by top_k_idx
            # Each element is among [0, vocab_size)
            # Ex. Index of token 3 in beam 4
            # (4 * vocab size) + 3 => 3
            # x: [batch_size x beam_size]
            x = tf.reshape((top_k_idx % self.vocab_size), [-1])

            # top-k-pointer [batch_size x beam_size]
            #       Points top-k beam that scored best at current step
            #       Later used as back-pointer at backtracking
            #       Each element is beam index: 0 ~ beam_size
            #                     + position index: 0 ~ beam_size x (batch_size-1)
            beam_idx = tf.cast((top_k_idx / self.vocab_size),
                               tf.int32)  # [batch_size, beam_size]
            top_k_pointer = tf.reshape(
                (beam_idx + tf.expand_dims(batch_position, 1)), [-1])

            # Select next h (size doesn't change)
            # [num_layers, batch_size * beam_size, hidden_size]
            h = tf.gather(h, top_k_pointer, axis=1)

            # Update sequence scores at beam
            beam.update(score, top_k_pointer, x)  # , h)

            # Erase scores for EOS so that they are not expanded
            # [batch_size, beam_size]
            eos_idx = tf.reshape(tf.math.equal(x, EOS_ID),
                                 [batch_size, self.beam_size])
            if tf.where(eos_idx).shape[0] > 0:
                score = tf.where(eos_idx, -float('inf'), score)

        # prediction ([batch, k, max_unroll])
        #     A list of Tensors containing predicted sequence
        # final_score [batch, k]
        #     A list containing the final scores for all top-k sequences
        # length [batch, k]
        #     A list specifying the length of each sequence in the top-k candidates
        # prediction, final_score, length = beam.backtrack()
        prediction, final_score, length = beam.backtrack()

        return prediction, final_score, length
Exemple #55
0
    def measure_homodyne(self, phi, mode, select=None, **kwargs):
        """
            Measures 'modes' in the basis of quadrature eigenstates (rotated by phi)
            and updates remaining modes conditioned on this result.
            After measurement, the states in 'modes' are reset to the vacuum.

            Args:
                phi (float): phase angle of quadrature to measure
                mode (int): which mode to measure.
                select (float): user-specified measurement value (used instead of random sampling)
                **kwargs: can be used to pass a session or a feed_dict. Otherwise a temporary session
                and no feed_dict will be used.

        Returns:
            The measured value (or a list of measured values when running in batch mode).
        """

        if not isinstance(mode, int):
            raise ValueError("Specified modes are not valid.")
        else:
            if mode < 0 or mode >= self._num_modes:
                raise ValueError("Specified modes are not valid.")

        m_omega_over_hbar = 1 / self._hbar
        if self._state_is_pure:
            mode_size = 1
        else:
            mode_size = 2
        if self._batched:
            batch_offset = 1
            batch_size = self._batch_size
        else:
            batch_offset = 0
            batch_size = 1

        with self.graph.as_default():
            phi = tf.cast(phi, ops.def_type)
            phi = self._maybe_batch(phi)

            evaluate_results, session, feed_dict, close_session = ops._check_for_eval(
                kwargs)

            if select is not None:
                meas_result = self._maybe_batch(select)
                homodyne_sample = tf.cast(meas_result,
                                          tf.float64,
                                          name="Meas_result")
            else:
                # create reduced state on mode to be measured
                reduced_state = ops.reduced_density_matrix(
                    self._state, mode, self._state_is_pure, self._batched)

                # rotate to homodyne basis
                # pylint: disable=invalid-unary-operand-type
                reduced_state = ops.phase_shifter(-phi, 0, reduced_state,
                                                  self._cutoff_dim, False,
                                                  self._batched)

                # create pdf for homodyne measurement
                # We use the following quadrature wavefunction for the Fock states:
                # \psi_n(x) = 1/sqrt[2^n n!](\frac{m \omega}{\pi \hbar})^{1/4}
                #             \exp{-\frac{m \omega}{2\hbar} x^2} H_n(\sqrt{\frac{m \omega}{\pi}} x)
                # where H_n(x) is the (physicists) nth Hermite polynomial
                if "max" in kwargs:
                    q_mag = kwargs["max"]
                else:
                    q_mag = 10
                if "num_bins" in kwargs:
                    num_bins = kwargs["num_bins"]
                else:
                    num_bins = 100000
                if "q_tensor" in self._cache:
                    # use cached q_tensor
                    q_tensor = self._cache["q_tensor"]
                else:
                    q_tensor = tf.constant(np.linspace(-q_mag, q_mag,
                                                       num_bins))
                    self._cache["q_tensor"] = q_tensor
                x = np.sqrt(m_omega_over_hbar) * q_tensor
                if "hermite_polys" in self._cache:
                    # use cached polynomials
                    hermite_polys = self._cache["hermite_polys"]
                else:
                    H0 = 0 * x + 1.0
                    H1 = 2 * x
                    hermite_polys = [H0, H1]
                    Hn = H1
                    Hn_m1 = H0
                    for n in range(1, self._cutoff_dim - 1):
                        Hn_p1 = ops.H_n_plus_1(Hn, Hn_m1, n, x)
                        hermite_polys.append(Hn_p1)
                        Hn_m1 = Hn
                        Hn = Hn_p1
                    self._cache["hermite_polys"] = hermite_polys

                number_state_indices = [
                    k for k in product(range(self._cutoff_dim), repeat=2)
                ]
                terms = [
                    1 / np.sqrt(2**n * factorial(n) * 2**m * factorial(m)) *
                    hermite_polys[n] * hermite_polys[m]
                    for n, m in number_state_indices
                ]
                hermite_matrix = tf.scatter_nd(
                    number_state_indices, terms,
                    [self._cutoff_dim, self._cutoff_dim, num_bins])
                hermite_terms = tf.multiply(
                    tf.expand_dims(reduced_state, -1),
                    tf.expand_dims(tf.cast(hermite_matrix, ops.def_type), 0))
                rho_dist = tf.cast(tf.reduce_sum(hermite_terms, axis=[1, 2]), tf.float64) \
                                     * (m_omega_over_hbar / np.pi) ** 0.5 \
                                     * tf.exp(- x ** 2) \
                                     * (q_tensor[1] - q_tensor[0]) # Delta_q for normalization (only works if the bins are equally spaced)

                # use tf.multinomial to sample
                logprobs = tf.log(rho_dist)
                samples_idx = tf.multinomial(logprobs, 1)
                homodyne_sample = tf.gather(q_tensor, samples_idx)
                homodyne_sample = tf.squeeze(homodyne_sample)

            if evaluate_results:
                meas_result = homodyne_sample.eval(feed_dict, session)
                if close_session:
                    session.close()
            else:
                meas_result = tf.identity(homodyne_sample, name="Meas_result")

            # project remaining modes into conditional state
            if self._num_modes == 1:
                # in this case, all modes were measured and we we put everything into vacuum
                self.reset(pure=self._state_is_pure)
            else:
                # only some modes were measured: put unmeasured modes in conditional state, while reseting measured modes to vac
                inf_squeezed_vac = tf.convert_to_tensor(
                    [(-0.5)**(m // 2) * np.sqrt(factorial(m)) /
                     factorial(m // 2) if m % 2 == 0 else 0.
                     for m in range(self._cutoff_dim)],
                    dtype=ops.def_type)
                if self._batched:
                    inf_squeezed_vac = tf.tile(
                        tf.expand_dims(inf_squeezed_vac, 0), [batch_size, 1])
                displacement_size = tf.stack(
                    tf.convert_to_tensor(meas_result *
                                         np.sqrt(m_omega_over_hbar / 2)))
                quad_eigenstate = ops.displacement(displacement_size, 0,
                                                   inf_squeezed_vac,
                                                   self._cutoff_dim, True,
                                                   self._batched)
                homodyne_eigenstate = ops.phase_shifter(
                    phi, 0, quad_eigenstate, self._cutoff_dim, True,
                    self._batched)

                conditional_state = ops.conditional_state(
                    self._state,
                    homodyne_eigenstate,
                    mode,
                    self._state_is_pure,
                    batched=self._batched)

                # normalize
                if self._state_is_pure:
                    norm = tf.norm(tf.reshape(conditional_state,
                                              [batch_size, -1]),
                                   axis=1)
                else:
                    # calculate norm of conditional_state
                    # cheap hack since tensorflow doesn't allow einsum equation for trace:
                    r = conditional_state
                    for _ in range(self._num_modes - 2):
                        r = ops.partial_trace(r, 0, False, self._batched)
                    norm = tf.trace(r)

                # for broadcasting
                norm_reshape = [1] * len(
                    conditional_state.shape[batch_offset:])
                if self._batched:
                    norm_reshape = [self._batch_size] + norm_reshape

                normalized_conditional_state = conditional_state / tf.reshape(
                    norm, norm_reshape)

                # reset measured modes into vacuum
                meas_mode_vac = self._single_mode_pure_vac if self._state_is_pure else self._single_mode_mixed_vac
                batch_index = indices[:batch_offset]
                meas_mode_indices = indices[batch_offset:batch_offset +
                                            mode_size]
                conditional_indices = indices[batch_offset +
                                              mode_size:batch_offset +
                                              mode_size * self._num_modes]
                eqn_lhs = batch_index + meas_mode_indices + "," + batch_index + conditional_indices
                eqn_rhs = ''
                meas_ctr = 0
                cond_ctr = 0
                for m in range(self._num_modes):
                    if m == mode:
                        # use measured_indices
                        eqn_rhs += meas_mode_indices[mode_size *
                                                     meas_ctr:mode_size *
                                                     (meas_ctr + 1)]
                        meas_ctr += 1
                    else:
                        # use conditional indices
                        eqn_rhs += conditional_indices[mode_size *
                                                       cond_ctr:mode_size *
                                                       (cond_ctr + 1)]
                        cond_ctr += 1
                eqn = eqn_lhs + "->" + batch_index + eqn_rhs
                new_state = tf.einsum(eqn, meas_mode_vac,
                                      normalized_conditional_state)

                self._update_state(new_state)

            return meas_result
Exemple #56
0
def tensormol_acsf(xyzs, Zs, elements, element_pairs, radial_cutoff,
                   angular_cutoff, radial_rs, angular_rs, theta_s, zeta, eta):
    """
    This function uses the tensormol atom centred symmetry functions.

    :param xyzs: tensor of shape (n_samples, n_atoms, 3)
    :param Zs: tensor of shape (n_samples, n_atoms)
    :param elements: np.array of shape (n_elements,)
    :param element_pairs: np.array of shape (n_elementpairs, 2)
    :param radial_cutoff: scalar float
    :param angular_cutoff: scalar float
    :param radial_rs: np.array of shape (n_rad_rs,)
    :param angular_rs: np.array of shape (n_ang_rs,)
    :param theta_s: np.array of shape (n_thetas,)
    :param zeta: scalar float
    :param eta: scalar float
    :return: a tf tensor of shape (n_samples, n_atoms, n_rad_rs * n_elements + n_ang_rs * n_thetas * n_elementpairs)
    """

    # The data
    with tf.name_scope("Params"):
        elements = tf.constant(elements, dtype=tf.int32)
        element_pairs = tf.constant(np.flip(element_pairs, axis=1),
                                    dtype=tf.int32)

        radial_cutoff = tf.constant(radial_cutoff, dtype=tf.float32)
        angular_cutoff = tf.constant(angular_cutoff, dtype=tf.float32)
        radial_rs = tf.constant(radial_rs, dtype=tf.float32)
        angular_rs = tf.constant(angular_rs, dtype=tf.float32)
        theta_s = tf.constant(theta_s, dtype=tf.float32)
        zeta = tf.constant(zeta, dtype=tf.float32)
        eta = tf.constant(eta, dtype=tf.float32)

        num_molecules = Zs.get_shape().as_list()[0]
        num_elements = elements.get_shape().as_list()[0]
        num_element_pairs = element_pairs.get_shape().as_list()[0]

    with tf.name_scope("Radial"):
        radial_embedding, pair_indices_rad, pair_elements = tf_symmetry_functions_radial_grid(
            xyzs, Zs, radial_cutoff, radial_rs, eta)
    with tf.name_scope("Angular"):
        angular_embedding, triples_indices, triples_element, sorted_triples_element_pairs = tf_symmetry_function_angular_grid(
            xyzs, Zs, angular_cutoff, angular_rs, theta_s, zeta, eta)

    with tf.name_scope("Sum_rad"):
        pair_element_indices = tf.cast(
            tf.where(
                tf.equal(tf.expand_dims(pair_elements[:, 1], axis=-1),
                         tf.expand_dims(elements, axis=0))), tf.int32)[:, 1]
        triples_elements_indices = tf.cast(
            tf.where(
                tf.reduce_all(tf.equal(
                    tf.expand_dims(sorted_triples_element_pairs, axis=-2),
                    element_pairs),
                              axis=-1)), tf.int32)[:, 1]

        radial_scatter_indices = tf.concat(
            [pair_indices_rad,
             tf.expand_dims(pair_element_indices, axis=1)],
            axis=1)
        angular_scatter_indices = tf.concat([
            triples_indices,
            tf.expand_dims(triples_elements_indices, axis=1)
        ],
                                            axis=1)

        radial_molecule_embeddings = tf.dynamic_partition(
            radial_embedding, pair_indices_rad[:, 0], num_molecules)
        radial_atom_indices = tf.dynamic_partition(
            radial_scatter_indices[:, 1:], pair_indices_rad[:, 0],
            num_molecules)
        angular_molecule_embeddings = tf.dynamic_partition(
            angular_embedding, triples_indices[:, 0], num_molecules)
        angular_atom_indices = tf.dynamic_partition(
            angular_scatter_indices[:, 1:], triples_indices[:, 0],
            num_molecules)

    with tf.name_scope("Sum_ang"):
        embeddings = []
        mol_atom_indices = []
        for molecule in range(num_molecules):
            atom_indices = tf.cast(tf.where(tf.not_equal(Zs[molecule], 0)),
                                   tf.int32)
            molecule_atom_elements = tf.gather_nd(Zs[molecule], atom_indices)
            num_atoms = tf.shape(molecule_atom_elements)[0]
            radial_atom_embeddings = tf.reshape(
                tf.reduce_sum(tf.scatter_nd(
                    radial_atom_indices[molecule],
                    radial_molecule_embeddings[molecule], [
                        num_atoms, num_atoms, num_elements,
                        tf.shape(radial_rs)[0]
                    ]),
                              axis=1), [num_atoms, -1])
            angular_atom_embeddings = tf.reshape(
                tf.reduce_sum(tf.scatter_nd(
                    angular_atom_indices[molecule],
                    angular_molecule_embeddings[molecule], [
                        num_atoms, num_atoms, num_atoms, num_element_pairs,
                        tf.shape(angular_rs)[0] * tf.shape(theta_s)[0]
                    ]),
                              axis=[1, 2]), [num_atoms, -1])
            embeddings.append(
                tf.concat([radial_atom_embeddings, angular_atom_embeddings],
                          axis=1))
            mol_atom_indices.append(
                tf.concat([tf.fill([num_atoms, 1], molecule), atom_indices],
                          axis=1))

        embeddings = tf.concat(embeddings, axis=0)
        mol_atom_indices = tf.concat(mol_atom_indices, axis=0)
        atom_Zs = tf.cast(tf.gather_nd(Zs, tf.where(tf.not_equal(Zs, 0))),
                          dtype=tf.int32)
        atom_Z_indices = tf.cast(
            tf.where(
                tf.equal(tf.expand_dims(atom_Zs, axis=1),
                         tf.expand_dims(elements, axis=0)))[:, 1], tf.int32)

    with tf.name_scope("Result"):
        element_embeddings = tf.dynamic_partition(embeddings, atom_Z_indices,
                                                  num_elements)
        mol_indices = tf.dynamic_partition(mol_atom_indices, atom_Z_indices,
                                           num_elements)

    return embeddings
def render_nd_bboxes_tf_spreading(elems, target_shape, ndim=2):
    """
    elems: tensor of size [..., n_boxes, 2*ndim + val_dim], where in the last dimension,
     there are packed edge coordinates and values (of val_dim) to be filled in the specified box.
    target_shape: list/tuple of ndim entries.
    returns: rendered image of size [elems(...), target_shape..., val_dim]
    ('elems(...)' usually means batch_size)
    """
    assert_shape_ndim = tf.Assert(tf.equal(tf.size(target_shape), ndim),
                                  [target_shape])
    assert_nonempty_data = tf.Assert(tf.greater(tf.shape(elems)[-1], 2 * ndim),
                                     [elems])

    with tf.control_dependencies([assert_shape_ndim, assert_nonempty_data]):
        """
        +1 ...... -1      ++++++      ++++++
        ...........       ......      ++++++
        ...........    -> ......   -> ++++++
        ...........       ------      ++++++
        -1        +1
        in 3d there must be another wall of minuses. looking like that:

        -   +
        .....
        +   -

        so when indexing [0, 1] to ltrb... pluses are when there is even number of 0s, - when odd.
        """
        el_ndim = len(elems.shape)
        # we do not access this property in tensorflow runtime, but in 'compile time', because, well,
        # number of dimensions
        # should be known before

        assert el_ndim >= 2 and el_ndim <= 3, "elements should be in the form of [batch, n, coordinates] or [n, " \
                                              "coordinates]"
        if el_ndim == 3:  # we use batch_size dimension also!
            bboxes_per_batch = tf.shape(elems)[1]
            batch_size = tf.shape(elems)[
                0]  # should be the same as image_input.shape[0]
            index_to_batch = tf.tile(tf.expand_dims(tf.range(batch_size), -1),
                                     (1, bboxes_per_batch))
            index_to_batch = tf.reshape(index_to_batch, (-1, 1))
        else:
            index_to_batch = None

        val_vector_size = tf.shape(elems)[-1] - 2 * ndim

        corner_ids = list(itertools.product([0, 1], repeat=ndim))
        corners_lists = []
        corners_values = []
        for corner in corner_ids:
            plus = sum(corner) % 2 == 0
            id_from_corner = [
                i + ndim * c for i, c in enumerate(corner)
            ]  # indexes a corner into [left, top, right, bottom] notation
            corner_coord = tf.gather(elems[..., 0:2 * ndim],
                                     id_from_corner,
                                     axis=-1)
            corner_value = elems[..., 2 * ndim:] * (
                1 if plus else -1)  # last dimension is == val_vector_size
            if index_to_batch is not None:
                # if the operation is called in batches, remember to rehape it all into one long list for scatter_nd
                # and add (concatenate) the batch ids
                corner_coord = tf.concat(
                    [index_to_batch,
                     tf.reshape(corner_coord, (-1, 2))],
                    axis=-1)
                corner_value = tf.reshape(corner_value, (-1, val_vector_size))
            corners_lists.append(corner_coord)
            corners_values.append(corner_value)

        indices = tf.concat(corners_lists, axis=0)
        updates = tf.concat(corners_values, axis=0)
        shape = tf.concat(
            [tf.shape(elems)[:-2], target_shape, [val_vector_size]], axis=0)

        dense_orig = tf.scatter_nd(
            indices,
            updates,
            shape=shape,
        )

        dense = dense_orig
        for dim in range(ndim):
            # we want to start from the axis before the last one. The last one is the value dimension, and
            # the first dimensions hidden in the '...' might be the batched dimensions
            dense = tf.cumsum(dense,
                              axis=-2 - dim,
                              exclusive=False,
                              reverse=False,
                              name=None)

        return dense
Exemple #58
0
    def model_fn(features, labels, mode, params):
        is_training = mode == tf.estimator.ModeKeys.TRAIN
        # Inputs
        tokens = features['features']  # (N, L)
        token_lengths = features['feature_length']  # (N,)
        sequence_mask = tf.sequence_mask(maxlen=tf.shape(tokens)[1], lengths=token_lengths)  # (N,L)
        n = tf.shape(tokens)[0]
        L = tf.shape(tokens)[1]

        with tf.control_dependencies([
            tf.assert_greater_equal(params.flat_length, token_lengths, message="Tokens longer than tree size"),
            tf.assert_greater(vocab_size, tokens, message="Tokens larger than vocab"),
            tf.assert_greater_equal(tokens, 0, message="Tokens less than 0")
        ]):
            tokens = tf.identity(tokens)

        if params.l2 > 0:
            weights_regularizer = slim.l2_regularizer(params.l2)
        else:
            weights_regularizer = None

        # Encoder
        mu_t, logsigma_t = vae_flat_encoder_simple(
            tokens=tokens,
            token_lengths=token_lengths,
            vocab_size=vocab_size,
            params=params,
            n=n,
            weights_regularizer=weights_regularizer
        )  # (L,N,D)
        mu = tf.transpose(mu_t, (1, 0, 2))  # (N,L,D)
        logsigma = tf.transpose(logsigma_t, (1, 0, 2))  # (N,L,D)

        # Sampling
        idx = tf.where(sequence_mask)
        with tf.name_scope("kl"):
            selected_mu = tf.gather_nd(params=mu, indices=idx)
            selected_logsigma = tf.gather_nd(params=logsigma, indices=idx)
            latent_sample_values, latent_prior_sample_values = kl(
                mu=selected_mu,
                logsigma=selected_logsigma,
                params=params,
                n=n)
            latent_sample = tf.scatter_nd(
                updates=latent_sample_values,
                indices=idx,
                shape=(n, L, latent_sample_values.shape[-1].value)
            )  # (N,L,D)
            latent_prior_sample = tf.scatter_nd(
                updates=latent_prior_sample_values,
                indices=idx,
                shape=(n, L, latent_prior_sample_values.shape[-1].value)
            )  # (N,L,D)

        # Decoder
        with tf.variable_scope('vae_decoder') as decoder_scope:
            logits, penalty = vae_decoder_dag(
                latent=latent_sample,
                vocab_size=vocab_size,
                sequence_lengths=token_lengths,
                params=params,
                weights_regularizer=weights_regularizer,
                n=n,
                is_training=is_training
            )
        with tf.name_scope("dag_penalty"):
            penalty_scale = get_penalty_scale_logistic(params)
            dag_penalty_raw = tf.reduce_mean(tf.square(penalty))
            weighted_dag_penalty = penalty_scale * dag_penalty_raw
            tf.losses.add_loss(loss=weighted_dag_penalty, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES)
            tf.summary.scalar('dag_penalty_scale', penalty_scale)
            tf.summary.scalar('dag_penalty_raw', dag_penalty_raw)
            tf.summary.scalar('dag_penalty_weighted', weighted_dag_penalty)

        # Loss calculation
        logits_values = tf.gather_nd(params=logits, indices=idx)
        labels_values = tf.gather_nd(params=tokens, indices=idx)
        onehot_labels_values = tf.one_hot(indices=labels_values, depth=vocab_size)
        loss_values = tf.losses.softmax_cross_entropy(
            onehot_labels=onehot_labels_values,
            logits=logits_values,
            reduction=tf.losses.Reduction.NONE,
            loss_collection=None
        )
        loss_arr = tf.scatter_nd(updates=loss_values, indices=idx, shape=(n, L))
        loss_n = tf.reduce_sum(loss_arr, axis=-1)
        loss = tf.reduce_mean(loss_n)
        tf.losses.add_loss(loss)
        tf.summary.scalar("softmax_cross_entropy", loss)

        total_loss = tf.losses.get_total_loss()

        # Generated data
        with tf.variable_scope(decoder_scope, reuse=True):
            glogits, _ = vae_decoder_dag(
                latent=latent_prior_sample,
                vocab_size=vocab_size,
                sequence_lengths=token_lengths,
                params=params,
                weights_regularizer=weights_regularizer,
                n=n,
                is_training=is_training
            )

        # Hooks
        autoencode_hook = DAGHook(
            logits=logits,
            true=tokens,
            vocab=vocab,
            path=os.path.join(run_config.model_dir, "autoencoded", "autoencoded-{:08d}.csv"),
            name="Autoencoded",
            idx=idx
        )
        generate_hook = DAGHook(
            logits=glogits,
            true=tokens,
            vocab=vocab,
            path=os.path.join(run_config.model_dir, "generated", "generated-{:08d}.csv"),
            name="Generated",
            idx=idx
        )
        evaluation_hooks = [autoencode_hook, generate_hook]

        #tf.summary.scalar('model_total_loss', total_loss)

        # Train
        optimizer = tf.train.AdamOptimizer(params.lr)
        train_op = slim.learning.create_train_op(
            total_loss,
            optimizer,
            clip_gradient_norm=params.clip_gradient_norm)
        eval_metric_ops = {
            'cross_entropy_eval': tf.metrics.mean(loss_n),
            'token_lengths_eval': tf.metrics.mean(token_lengths)
        }

        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=total_loss,
            eval_metric_ops=eval_metric_ops,
            evaluation_hooks=evaluation_hooks,
            train_op=train_op)
TF_B_map = tf.constant(np.squeeze(B_map))
OPD_mask_flat = tf.Variable(
    np.reshape(OPD_mask, OPD_mask.shape[0] * OPD_mask.shape[1]))

# Placeholder for the learningrate
TF_lr = tf.placeholder(tf.float32, shape=[])
TF_lambda_TV = tf.placeholder(tf.float32, shape=[])
TF_epsC = tf.placeholder(tf.float32, shape=[])

# This is the matlab reconstruction (Minimum Norm) #TODO: We want to compute this in Python too!
TF_opd = tf.Variable(myinitopd)

# We only want to update the inner part of the mask (where OPD_mask is greater than 0)
updates = tf.boolean_mask(TF_opd, OPD_mask > 0)  # TF_opd*OPD_mask
indexes = tf.cast(tf.where(OPD_mask > 0), tf.int32)
TF_opd_masked = tf.scatter_nd(indexes, updates, tf.shape(OPD_mask))

# Compute the "Guess" based on the Variable OPD
TF_R_guess = tf_jammin.polyeval(TF_opd_masked, np.squeeze(R_fit_func.coeffs))
TF_G_guess = tf_jammin.polyeval(TF_opd_masked, np.squeeze(G_fit_func.coeffs))
TF_B_guess = tf_jammin.polyeval(TF_opd_masked, np.squeeze(B_fit_func.coeffs))
''' formulate cost-fct 1:'''
# we want to add a smootheness constraint on the result coming from L2 minimization,
# This is done by adding TV-regularizer on the indexed image
# This one should reduce the L2 distance between the RGB Pixels to the one in the
# RGB-OPD lookup-table
TF_mySqrError = tf.reduce_mean(
    ((TF_R_guess - TF_R_exp)**2 + (TF_G_guess - TF_G_exp)**2 +
     (TF_B_guess - TF_B_exp)**2))

# in order to have a smooth phase without discontinuities we want to have a small TV norm
Exemple #60
0
    def inference(self,
                  inputs,
                  nb_classes,
                  bias_mat,
                  hid_units,
                  n_heads,
                  activation=tf.nn.elu,
                  residual=False,
                  k=0.5):

        select_num = tf.cast(inputs.shape[1].value * k, dtype=tf.int32)

        # mean_sum = tf.reduce_sum(tf.square(inputs), -1)
        p = tf.Variable(
            tf.truncated_normal([int(inputs.shape[-1]), 1], stddev=0.1))
        mean_sum = tf.reshape(
            tf.matmul(inputs, p) / tf.reduce_sum(tf.square(p)),
            [-1, int(inputs.shape[1])])

        a_top, a_top_idx = tf.nn.top_k(mean_sum, select_num)

        a_top_1, a_top_idx_1 = tf.nn.top_k(mean_sum, inputs.shape[1])

        a_shape = tf.shape(mean_sum)
        a_top_sm = a_top * 0 + 1

        a_row_idx = tf.tile(
            tf.range(a_shape[0])[:, tf.newaxis], (1, select_num))
        """
        a_row_idx = [array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1],
       ...
       [15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15]],
      dtype=int32)]
        """
        scatter_idx = tf.stack([a_row_idx, a_top_idx], axis=-1)
        result = tf.scatter_nd(scatter_idx, a_top_sm, a_shape)
        a_index = tf.tile(tf.expand_dims(result, -1), (1, 1, inputs.shape[-1]))
        c_index = a_index
        inputs = a_index * inputs

        attns = []
        for _ in range(n_heads[0]):
            attns.append(
                attn_head(inputs,
                          bias_mat=bias_mat,
                          out_sz=hid_units[0],
                          activation=activation,
                          residual=False))
        h_1 = tf.concat(attns, axis=-1)
        for i in range(1, len(hid_units)):
            attns = []
            for _ in range(n_heads[i]):
                attns.append(
                    attn_head(h_1,
                              bias_mat=bias_mat,
                              out_sz=hid_units[i],
                              activation=activation,
                              residual=residual))
            h_1 = tf.concat(attns, axis=-1)

        a_index = tf.tile(tf.expand_dims(result, -1), (1, 1, h_1.shape[-1]))
        h_1 = a_index * h_1

        logits = tf.layers.dense(inputs=h_1,
                                 units=nb_classes,
                                 activation=tf.nn.leaky_relu)
        a_index = tf.tile(tf.expand_dims(result, -1), (1, 1, logits.shape[-1]))
        logits = a_index * logits
        return a_index, h_1, logits, inputs, select_num, a_top_idx_1