Example #1
0
def add_volume_iou_metrics(inputs, outputs):
  """Computes the per-instance volume IOU.

  Args:
    inputs: Input dictionary of the voxel generation model.
    outputs: Output dictionary returned by the voxel generation model.

  Returns:
    names_to_values: metrics->values (dict).
    names_to_updates: metrics->ops (dict).

  """
  names_to_values = dict()
  names_to_updates = dict()
  labels = tf.greater_equal(inputs['voxels'], 0.5)
  predictions = tf.greater_equal(outputs['voxels_1'], 0.5)
  labels = 2 - tf.to_int32(labels)
  predictions = 3 - tf.to_int32(predictions) * 2
  tmp_values, tmp_updates = tf.metrics.mean_iou(
      labels=labels,
      predictions=predictions,
      num_classes=3)
  names_to_values['volume_iou'] = tmp_values * 3.0
  names_to_updates['volume_iou'] = tmp_updates
  return names_to_values, names_to_updates
Example #2
0
def crossentropy(logits, targets, sequence_length):
    """ Computes cross entropy loss of a batch of data. (Not averaged by batch_size)

    The final loss is averaged by the number of samples in the batch.

    Args:
        logits: The logits Tensor with shape [timesteps, batch_size, vocab_size].
        targets: The gold labels Tensor with shape [timesteps, batch_size].
        sequence_length: The length of `targets`, [batch_size, ]

    Returns: Loss sum and weight sum.
    """
    # [timesteps, batch_size]
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=targets)

    # [timesteps, batch_size]
    loss_mask = tf.transpose(
        tf.sequence_mask(
            lengths=tf.to_int32(sequence_length),
            maxlen=tf.to_int32(tf.shape(targets)[0]),
            dtype=tf.float32), [1, 0])

    losses = losses * loss_mask
    loss_sum = tf.reduce_sum(losses)
    return loss_sum, tf.to_float(tf.shape(sequence_length)[0])
Example #3
0
def pad_to_multiple(tensor, multiple):
  """Returns the tensor zero padded to the specified multiple.

  Appends 0s to the end of the first and second dimension (height and width) of
  the tensor until both dimensions are a multiple of the input argument
  'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
  multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
  be of shape [1, 4, 8, 1].

  Args:
    tensor: rank 4 float32 tensor, where
            tensor -> [batch_size, height, width, channels].
    multiple: the multiple to pad to.

  Returns:
    padded_tensor: the tensor zero padded to the specified multiple.
  """
  tensor_shape = tensor.get_shape()
  batch_size = static_shape.get_batch_size(tensor_shape)
  tensor_height = static_shape.get_height(tensor_shape)
  tensor_width = static_shape.get_width(tensor_shape)
  tensor_depth = static_shape.get_depth(tensor_shape)

  if batch_size is None:
    batch_size = tf.shape(tensor)[0]

  if tensor_height is None:
    tensor_height = tf.shape(tensor)[1]
    padded_tensor_height = tf.to_int32(
        tf.ceil(tf.to_float(tensor_height) / tf.to_float(multiple))) * multiple
  else:
    padded_tensor_height = int(
        math.ceil(float(tensor_height) / multiple) * multiple)

  if tensor_width is None:
    tensor_width = tf.shape(tensor)[2]
    padded_tensor_width = tf.to_int32(
        tf.ceil(tf.to_float(tensor_width) / tf.to_float(multiple))) * multiple
  else:
    padded_tensor_width = int(
        math.ceil(float(tensor_width) / multiple) * multiple)

  if tensor_depth is None:
    tensor_depth = tf.shape(tensor)[3]

  # Use tf.concat instead of tf.pad to preserve static shape
  if padded_tensor_height != tensor_height:
    height_pad = tf.zeros([
        batch_size, padded_tensor_height - tensor_height, tensor_width,
        tensor_depth
    ])
    tensor = tf.concat([tensor, height_pad], 1)
  if padded_tensor_width != tensor_width:
    width_pad = tf.zeros([
        batch_size, padded_tensor_height, padded_tensor_width - tensor_width,
        tensor_depth
    ])
    tensor = tf.concat([tensor, width_pad], 2)

  return tensor
Example #4
0
def adjust_bboxes(bboxes, old_height, old_width, new_height, new_width):
    """Adjusts the bboxes of an image that has been resized.

    Args:
        bboxes: Tensor with shape (num_bboxes, 5). Last element is the label.
        old_height: Float. Height of the original image.
        old_width: Float. Width of the original image.
        new_height: Float. Height of the image after resizing.
        new_width: Float. Width of the image after resizing.
    Returns:
        Tensor with shape (num_bboxes, 5), with the adjusted bboxes.
    """
    # We normalize bounding boxes points.
    bboxes_float = tf.to_float(bboxes)
    x_min, y_min, x_max, y_max, label = tf.unstack(bboxes_float, axis=1)

    x_min = x_min / old_width
    y_min = y_min / old_height
    x_max = x_max / old_width
    y_max = y_max / old_height

    # Use new size to scale back the bboxes points to absolute values.
    x_min = tf.to_int32(x_min * new_width)
    y_min = tf.to_int32(y_min * new_height)
    x_max = tf.to_int32(x_max * new_width)
    y_max = tf.to_int32(y_max * new_height)
    label = tf.to_int32(label)  # Cast back to int.

    # Concat points and label to return a [num_bboxes, 5] tensor.
    return tf.stack([x_min, y_min, x_max, y_max, label], axis=1)
    def decoder(self, logits_main, logits_sub, inputs_seq_len, beam_width=1):
        """Operation for decoding.
        Args:
            logits_main: A tensor of size `[T, B, input_size]`
            logits_sub: A tensor of size `[T, B, input_size]`
            inputs_seq_len: A tensor of size `[B]`
            beam_width (int, optional): beam width for beam search.
                1 disables beam search, which mean greedy decoding.
        Return:
            decode_op_main: operation for decoding of the main task
            decode_op_sub: operation for decoding of the sub task
        """
        assert isinstance(beam_width, int), "beam_width must be integer."
        assert beam_width >= 1, "beam_width must be >= 1"

        # inputs_seq_len = tf.cast(inputs_seq_len, tf.int32)

        if beam_width == 1:
            decoded_main, _ = tf.nn.ctc_greedy_decoder(
                logits_main, inputs_seq_len)
            decoded_sub, _ = tf.nn.ctc_greedy_decoder(
                logits_sub, inputs_seq_len)

        else:
            decoded_main, _ = tf.nn.ctc_beam_search_decoder(
                logits_main, inputs_seq_len,
                beam_width=beam_width)
            decoded_sub, _ = tf.nn.ctc_beam_search_decoder(
                logits_sub, inputs_seq_len,
                beam_width=beam_width)

        decode_op_main = tf.to_int32(decoded_main[0])
        decode_op_sub = tf.to_int32(decoded_sub[0])

        return decode_op_main, decode_op_sub
Example #6
0
def _smallest_size_at_least(height, width, smallest_side):
    """Computes new shape with the smallest side equal to `smallest_side`.

    Computes new shape with the smallest side equal to `smallest_side` while
    preserving the original aspect ratio.

    Args:
      height: an int32 scalar tensor indicating the current height.
      width: an int32 scalar tensor indicating the current width.
      smallest_side: A python integer or scalar `Tensor` indicating the size of
        the smallest side after resize.

    Returns:
      new_height: an int32 scalar tensor indicating the new height.
      new_width: and int32 scalar tensor indicating the new width.
    """
    smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)

    height = tf.to_float(height)
    width = tf.to_float(width)
    smallest_side = tf.to_float(smallest_side)

    scale = tf.cond(tf.greater(height, width),
                    lambda: smallest_side / width,
                    lambda: smallest_side / height)
    new_height = tf.to_int32(height * scale)
    new_width = tf.to_int32(width * scale)
    return new_height, new_width
Example #7
0
def indices_to_dense_vector(indices,
                            size,
                            indices_value=1.,
                            default_value=0,
                            dtype=tf.float32):
    """Creates dense vector with indices set to specific value and rest to zeros.

    This function exists because it is unclear if it is safe to use
      tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
    with indices which are not ordered.
    This function accepts a dynamic size (e.g. tf.shape(tensor)[0])

    Args:
      indices: 1d Tensor with integer indices which are to be set to
          indices_values.
      size: scalar with size (integer) of output Tensor.
      indices_value: values of elements specified by indices in the output vector
      default_value: values of other elements in the output vector.
      dtype: data type.

    Returns:
      dense 1D Tensor of shape [size] with indices set to indices_values and the
          rest set to default_value.
    """
    size = tf.to_int32(size)
    zeros = tf.ones([size], dtype=dtype) * default_value
    values = tf.ones_like(indices, dtype=dtype) * indices_value

    return tf.dynamic_stitch([tf.range(size), tf.to_int32(indices)],
                             [zeros, values])
def get_exemplar_images(images, exemplar_size, targets_pos=None):
  """Crop exemplar image from input images"""
  with tf.name_scope('get_exemplar_image'):
    batch_size, x_height, x_width = images.get_shape().as_list()[:3]
    z_height, z_width = exemplar_size

    if targets_pos is None:
      target_pos_single = [[get_center(x_height), get_center(x_width)]]
      targets_pos_ = tf.tile(target_pos_single, [batch_size, 1])
    else:
      targets_pos_ = targets_pos

    # convert to top-left corner based coordinates
    top = tf.to_int32(tf.round(targets_pos_[:, 0] - get_center(z_height)))
    bottom = tf.to_int32(top + z_height)
    left = tf.to_int32(tf.round(targets_pos_[:, 1] - get_center(z_width)))
    right = tf.to_int32(left + z_width)

    def _slice(x):
      f, t, l, b, r = x
      c = f[t:b, l:r]
      return c

    exemplar_img = tf.map_fn(_slice, (images, top, left, bottom, right), dtype=images.dtype)
    exemplar_img.set_shape([batch_size, z_height, z_width, 3])
    return exemplar_img
def test_accuracy(logits, labels):
    logits_idx = tf.to_int32(tf.argmax(logits, axis=1))
    logits_idx = tf.reshape(logits_idx, shape=(cfg.batch_size,))
    correct_preds = tf.equal(tf.to_int32(labels), logits_idx)
    accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) / cfg.batch_size

    return accuracy
Example #10
0
def crop_or_pad(waves, length, channels):
  """Crop or pad wave to have shape [N, length, channels].

  Args:
    waves: A 3D `Tensor` of NLC format.
    length: A Python scalar. The output wave size.
    channels: Number of output waves channels.

  Returns:
    A 3D `Tensor` of NLC format with shape [N, length, channels].
  """
  waves = tf.convert_to_tensor(waves)
  batch_size = waves.shape[0].value
  waves_shape = tf.shape(waves)

  # Force audio length.
  pad = tf.maximum(0, length - waves_shape[1])
  right_pad = tf.to_int32(tf.to_float(pad) / 2.0)
  left_pad = pad - right_pad
  waves = tf.pad(waves, [[0, 0], [left_pad, right_pad], [0, 0]])
  waves = waves[:, :length, :]

  # Force number of channels.
  num_repeats = tf.to_int32(
      tf.ceil(tf.to_float(channels) / tf.to_float(waves_shape[2])))
  waves = tf.tile(waves, [1, 1, num_repeats])[:, :, :channels]

  waves.set_shape([batch_size, length, channels])
  return waves
Example #11
0
def padded_sequence_accuracy(predictions,
                             labels,
                             weights_fn=common_layers.weights_nonzero):
  """Percentage of times that predictions matches labels everywhere (non-0)."""
  # If the last dimension is 1 then we're using L1/L2 loss.
  if common_layers.shape_list(predictions)[-1] == 1:
    return rounding_sequence_accuracy(
        predictions, labels, weights_fn=weights_fn)
  with tf.variable_scope(
      "padded_sequence_accuracy", values=[predictions, labels]):
    padded_predictions, padded_labels = common_layers.pad_with_zeros(
        predictions, labels)
    weights = weights_fn(padded_labels)

    # Flatten, keeping batch dim (and num_classes dim for predictions)
    # TPU argmax can only deal with a limited number of dimensions
    predictions_shape = common_layers.shape_list(padded_predictions)
    batch_size = predictions_shape[0]
    num_classes = predictions_shape[-1]
    flat_size = common_layers.list_product(
        common_layers.shape_list(padded_labels)[1:])
    padded_predictions = tf.reshape(
        padded_predictions,
        [batch_size, common_layers.list_product(predictions_shape[1:-1]),
         num_classes])
    padded_labels = tf.reshape(padded_labels, [batch_size, flat_size])
    weights = tf.reshape(weights, [batch_size, flat_size])

    outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1))
    padded_labels = tf.to_int32(padded_labels)
    not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights
    axis = list(range(1, len(outputs.get_shape())))
    correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis))
    return correct_seq, tf.constant(1.0)
Example #12
0
def smoothing_crossentropy_avgall(logits, targets, sequence_length):
    """ Computes cross entropy loss of a batch of data with label smoothing.

    The final loss is averaged by the length of each
    sequence and then averaged by the batch size.

    Args:
        logits: The logits Tensor with shape [timesteps, batch_size, vocab_size].
        targets: The gold labels Tensor with shape [timesteps, batch_size].
        sequence_length: The length of `targets`, [batch_size, ]

    Returns: Loss sum and weight sum.
    """
    soft_targets, normalizing = label_smoothing(targets, logits.get_shape().as_list()[-1])
    losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=soft_targets) - normalizing
    # [timesteps, batch_size]
    loss_mask = tf.transpose(
        tf.sequence_mask(
            lengths=tf.to_int32(sequence_length),
            maxlen=tf.to_int32(tf.shape(targets)[0]),
            dtype=tf.float32), [1, 0])
    losses = losses * loss_mask
    # average loss
    avg_length = tf.to_float(sequence_length)
    loss_by_time = tf.reduce_sum(losses, axis=0) / avg_length
    loss_sum = tf.reduce_sum(loss_by_time)
    return loss_sum, tf.to_float(tf.shape(sequence_length)[0])
Example #13
0
def padded_accuracy(logits, labels):
  """Percentage of times that predictions matches labels on non-0s."""
  with tf.variable_scope("padded_accuracy", values=[logits, labels]):
    logits, labels = _pad_tensors_to_same_length(logits, labels)
    weights = tf.to_float(tf.not_equal(labels, 0))
    outputs = tf.to_int32(tf.argmax(logits, axis=-1))
    padded_labels = tf.to_int32(labels)
    return tf.to_float(tf.equal(outputs, padded_labels)), weights
  def subsample(self, indicator, batch_size, labels, scope=None):
    """Returns subsampled minibatch.

    Args:
      indicator: boolean tensor of shape [N] whose True entries can be sampled.
      batch_size: desired batch size. If None, keeps all positive samples and
        randomly selects negative samples so that the positive sample fraction
        matches self._positive_fraction. It cannot be None is is_static is True.
      labels: boolean tensor of shape [N] denoting positive(=True) and negative
          (=False) examples.
      scope: name scope.

    Returns:
      sampled_idx_indicator: boolean tensor of shape [N], True for entries which
        are sampled.

    Raises:
      ValueError: if labels and indicator are not 1D boolean tensors.
    """
    if len(indicator.get_shape().as_list()) != 1:
      raise ValueError('indicator must be 1 dimensional, got a tensor of '
                       'shape %s' % indicator.get_shape())
    if len(labels.get_shape().as_list()) != 1:
      raise ValueError('labels must be 1 dimensional, got a tensor of '
                       'shape %s' % labels.get_shape())
    if labels.dtype != tf.bool:
      raise ValueError('labels should be of type bool. Received: %s' %
                       labels.dtype)
    if indicator.dtype != tf.bool:
      raise ValueError('indicator should be of type bool. Received: %s' %
                       indicator.dtype)
    with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'):
      if self._is_static:
        return self._static_subsample(indicator, batch_size, labels)

      else:
        # Only sample from indicated samples
        negative_idx = tf.logical_not(labels)
        positive_idx = tf.logical_and(labels, indicator)
        negative_idx = tf.logical_and(negative_idx, indicator)

        # Sample positive and negative samples separately
        if batch_size is None:
          max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx))
        else:
          max_num_pos = int(self._positive_fraction * batch_size)
        sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
        num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32))
        if batch_size is None:
          negative_positive_ratio = (
              1 - self._positive_fraction) / self._positive_fraction
          max_num_neg = tf.to_int32(
              negative_positive_ratio * tf.to_float(num_sampled_pos))
        else:
          max_num_neg = batch_size - num_sampled_pos
        sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)

        return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
Example #15
0
  def _build_once(self, dataset, feature_transformer):
    with tf.device(self._local_device):
      tr_batch = dataset()
      te_batch = dataset()
      num_classes = tr_batch.label_onehot.shape.as_list()[1]
      all_batch = utils.structure_map_multi(lambda x: tf.concat(x, 0),
                                            [tr_batch, te_batch])
      features = feature_transformer(all_batch)
      trX, teX = utils.structure_map_split(lambda x: tf.split(x, 2, axis=0),
                                           features)
      trY = tf.to_int64(tr_batch.label)
      trY_onehot = tf.to_int32(tr_batch.label_onehot)
      teY = tf.to_int64(te_batch.label)
      teY_shape = teY.shape.as_list()

      def blackbox((trX, trY, teX, teY)):
        trY = tf.to_int32(tf.rint(trY))
        teY = tf.to_int32(tf.rint(teY))
        tf_fn = build_fit(
            self._local_device,
            self._get_model,
            num_classes=num_classes,
            probs=self.probs)
        if self.probs:
          trP, teP, teP_probs = tf_fn(trX, trY, teX)
        else:
          trP, teP = tf_fn(trX, trY, teX)

        teY.set_shape(teY_shape)
        if self.probs:
          onehot = tf.one_hot(teY, num_classes)
          crossent = -tf.reduce_sum(onehot * teP_probs, [1])
          return tf.reduce_mean(crossent)
        else:
          # use error rate as the loss if no surrogate is avalible.
          return 1 - tf.reduce_mean(
              tf.to_float(tf.equal(teY, tf.to_int32(teP))))

      test_loss = blackbox((trX, tf.to_float(trY), teX, tf.to_float(teY)))

      stats = {}

      tf_fn = build_fit(
          self._local_device,
          self._get_model,
          num_classes=num_classes,
          probs=self.probs)
      if self.probs:
        trP, teP, teP_probs = tf_fn(trX, trY, teX)
      else:
        trP, teP = tf_fn(trX, trY, teX)
      stats["%s/accuracy_train" % self.name] = tf.reduce_mean(
          tf.to_float(tf.equal(tf.to_int32(trY), tf.to_int32(trP))))
      stats["%s/accuracy_test" % self.name] = tf.reduce_mean(
          tf.to_float(tf.equal(tf.to_int32(teY), tf.to_int32(teP))))
      stats["%s/test_loss" % self.name] = test_loss
      return test_loss, stats
	def predict_setup(self):
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.test_data_list,
				None, # the images have different sizes
				False, # no data-aug
				False, # no data-aug
				self.conf.ignore_label,
				IMG_MEAN,
				self.coord)
			image, label = reader.image, reader.label # [h, w, 3 or 1]
		# Add one batch dimension [1, h, w, 3 or 1]
		image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0)
		h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2])
		image_batch_075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))]))
		image_batch_05 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))]))
		

		# Create network
		if self.conf.encoder_name not in ['res101', 'res50']:
			print('encoder_name ERROR!')
			print("Please input: res101, res50")
			sys.exit(-1)
		else:
			with tf.variable_scope('', reuse=False):
				net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name)
			with tf.variable_scope('', reuse=True):
				net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, False, self.conf.encoder_name)
			with tf.variable_scope('', reuse=True):
				net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, False, self.conf.encoder_name)

		# predictions
		# Network raw output
		raw_output100 = net.outputs
		raw_output075 = net075.outputs
		raw_output05 = net05.outputs
		raw_output = tf.reduce_max(tf.stack([raw_output100,
									tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]),
									tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0)
		raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
		raw_output = tf.argmax(raw_output, axis=3)
		self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8)

		# Create directory
		if not os.path.exists(self.conf.out_dir):
			os.makedirs(self.conf.out_dir)
			os.makedirs(self.conf.out_dir + '/prediction')
			if self.conf.visual:
				os.makedirs(self.conf.out_dir + '/visual_prediction')

		# Loader for loading the checkpoint
		self.loader = tf.train.Saver(var_list=tf.global_variables())
Example #17
0
def rounding_accuracy(predictions,
                      labels,
                      weights_fn=common_layers.weights_nonzero):
  """Rounding accuracy for L1/L2 losses: round down the predictions to ints."""
  outputs = tf.squeeze(tf.to_int32(predictions))
  labels = tf.squeeze(labels)
  weights = weights_fn(labels)
  labels = tf.to_int32(labels)
  return tf.to_float(tf.equal(outputs, labels)), weights
Example #18
0
  def _anchor_component_tf(self):
    print('Use TF anchors')
    with tf.variable_scope('ANCHOR_' + self._tag) as scope:
      # just to get the shape right
      height = tf.to_int32(tf.ceil(self._im_info[0, 0] / np.float32(self._feat_stride[0])))
      width = tf.to_int32(tf.ceil(self._im_info[0, 1] / np.float32(self._feat_stride[0])))

      self._anchors, self._anchor_length = generate_anchors_pre_tf(
        height, width, self._feat_stride[0], self._anchor_scales,
        self._anchor_ratios)
Example #19
0
 def preprocess_example(self, example, mode, hparams):
   example = super(AudioTimitProblem, self).preprocess_example(
       example, mode, hparams)
   # Reshape audio to proper shape
   sample_count = tf.to_int32(example.pop("audio/sample_count"))
   sample_width = tf.to_int32(example.pop("audio/sample_width"))
   channel_count = 1
   example["inputs"] = tf.reshape(example["inputs"],
                                  [sample_count, sample_width, channel_count])
   return example
Example #20
0
def arg_max_2d(x_in):
    orig_shape = tf.shape(x_in)
    reshape_t = tf.concat([orig_shape[0:1], [-1], orig_shape[3:4]], 0)
    zz = tf.reshape(x_in, reshape_t)
    pp = tf.to_int32(tf.argmax(zz, 1))
    sz1 = tf.slice(orig_shape, [2], [1])
    cc1 = tf.div(pp, tf.to_int32(sz1))
    cc2 = tf.mod(pp, tf.to_int32(sz1))

    return tf.stack([cc1, cc2])
Example #21
0
  def __init__(self, requests, expert_capacity):
    """Create a TruncatingDispatcher.

    Args:
      requests: a boolean `Tensor` of shape `[batch, length, num_experts]`.
        Alternatively, a float or int Tensor containing zeros and ones.
      expert_capacity: a Scalar - maximum number of examples per expert per
        batch element.

    Returns:
      a TruncatingDispatcher
    """
    self._requests = tf.to_float(requests)
    self._expert_capacity = expert_capacity
    expert_capacity_f = tf.to_float(expert_capacity)
    self._batch, self._length, self._num_experts = tf.unstack(
        tf.shape(self._requests), num=3)

    # [batch, length, num_experts]
    position_in_expert = tf.cumsum(self._requests, axis=1, exclusive=True)
    # [batch, length, num_experts]
    self._gates = self._requests * tf.to_float(
        tf.less(position_in_expert, expert_capacity_f))
    batch_index = tf.reshape(
        tf.to_float(tf.range(self._batch)), [self._batch, 1, 1])
    length_index = tf.reshape(
        tf.to_float(tf.range(self._length)), [1, self._length, 1])
    expert_index = tf.reshape(
        tf.to_float(tf.range(self._num_experts)), [1, 1, self._num_experts])
    # position in a Tensor with shape [batch * num_experts * expert_capacity]
    flat_position = (
        position_in_expert +
        batch_index * (tf.to_float(self._num_experts) * expert_capacity_f) +
        expert_index * expert_capacity_f)
    # Tensor of shape [batch * num_experts * expert_capacity].
    # each element is an integer in [0, length)
    self._indices = tf.unsorted_segment_sum(
        data=tf.reshape((length_index + 1.0) * self._gates, [-1]),
        segment_ids=tf.to_int32(tf.reshape(flat_position, [-1])),
        num_segments=self._batch * self._num_experts * expert_capacity)
    self._indices = tf.reshape(
        self._indices,
        [self._batch, self._num_experts, expert_capacity])
    # Tensors of shape [batch, num_experts, expert_capacity].
    # each element is 0.0 or 1.0
    self._nonpadding = tf.minimum(self._indices, 1.0)
    # each element is an integer in [0, length)
    self._indices = tf.nn.relu(self._indices - 1.0)
    # self._flat_indices is [batch, num_experts, expert_capacity], with values
    # in [0, batch * length)
    self._flat_indices = tf.to_int32(
        self._indices +
        (tf.reshape(tf.to_float(tf.range(self._batch)), [-1, 1, 1])
         * tf.to_float(self._length)))
    self._indices = tf.to_int32(self._indices)
Example #22
0
def create_learning_rate_decay_fn(decay_type,
                                  decay_steps,
                                  decay_rate,
                                  start_decay_at=0,
                                  stop_decay_at=1e9,
                                  min_learning_rate=None,
                                  staircase=False):
  """Creates a function that decays the learning rate.

  Args:
    decay_steps: How often to apply decay.
    decay_rate: A Python number. The decay rate.
    start_decay_at: Don't decay before this step
    stop_decay_at: Don't decay after this step
    min_learning_rate: Don't decay below this number
    decay_type: A decay function name defined in `tf.train`
    staircase: Whether to apply decay in a discrete staircase,
      as opposed to continuous, fashion.

  Returns:
    A function that takes (learning_rate, global_step) as inputs
    and returns the learning rate for the given step.
    Returns `None` if decay_type is empty or None.
  """
  if decay_type is None or decay_type == "":
    return None

  start_decay_at = tf.to_int32(start_decay_at)
  stop_decay_at = tf.to_int32(stop_decay_at)

  def decay_fn(learning_rate, global_step):
    """The computed learning rate decay function.
    """
    global_step = tf.to_int32(global_step)

    decay_type_fn = getattr(tf.train, decay_type)
    decayed_learning_rate = decay_type_fn(
        learning_rate=learning_rate,
        global_step=tf.minimum(global_step, stop_decay_at) - start_decay_at,
        decay_steps=decay_steps,
        decay_rate=decay_rate,
        staircase=staircase,
        name="decayed_learning_rate")

    final_lr = tf.train.piecewise_constant(
        x=global_step,
        boundaries=[start_decay_at],
        values=[learning_rate, decayed_learning_rate])

    if min_learning_rate:
      final_lr = tf.maximum(final_lr, min_learning_rate)

    return final_lr

  return decay_fn
Example #23
0
def rounding_sequence_accuracy(predictions,
                               labels,
                               weights_fn=common_layers.weights_nonzero):
  """Sequence accuracy for L1/L2 losses: round down the predictions to ints."""
  outputs = tf.squeeze(tf.to_int32(predictions), axis=-1)
  weights = weights_fn(labels)
  labels = tf.to_int32(labels)
  not_correct = tf.to_float(tf.not_equal(outputs, labels)) * weights
  axis = list(range(1, len(outputs.get_shape())))
  correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis))
  return correct_seq, tf.constant(1.0)
Example #24
0
    def testListOfScalarTensors(self):
        a = tf.to_int32(5)
        b = tf.to_int32(6)

        value = np.random.rand(11, 11)

        with self.test_session(use_gpu=False) as sess:
            result = sess.run(tf.split(value, [a, b]))

        self.assertAllEqual(result[0], value[0:5, :])
        self.assertAllEqual(result[1], value[5:, :])
Example #25
0
def padded_sequence_accuracy(logits, labels):
  """Percentage of times that predictions matches labels everywhere (non-0)."""
  with tf.variable_scope("padded_sequence_accuracy", values=[logits, labels]):
    logits, labels = _pad_tensors_to_same_length(logits, labels)
    weights = tf.to_float(tf.not_equal(labels, 0))
    outputs = tf.to_int32(tf.argmax(logits, axis=-1))
    padded_labels = tf.to_int32(labels)
    not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights
    axis = list(range(1, len(outputs.get_shape())))
    correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis))
    return correct_seq, tf.constant(1.0)
Example #26
0
def padded_accuracy(predictions,
                    labels,
                    weights_fn=common_layers.weights_nonzero):
  """Percentage of times that predictions matches labels on non-0s."""
  with tf.variable_scope("padded_accuracy", values=[predictions, labels]):
    padded_predictions, padded_labels = common_layers.pad_with_zeros(
        predictions, labels)
    weights = weights_fn(padded_labels)
    outputs = tf.to_int32(tf.argmax(padded_predictions, axis=-1))
    padded_labels = tf.to_int32(padded_labels)
    return tf.to_float(tf.equal(outputs, padded_labels)), weights
Example #27
0
def argmax2d(Xin):
    
    origShape = tf.shape(Xin)
    reshape_t = tf.concat(0,[origShape[0:1],[-1],origShape[3:4]])
    zz = tf.reshape(Xin,reshape_t)
    pp = tf.to_int32(tf.argmax(zz,1))
    sz1 = tf.slice(origShape,[2],[1])
    cc1 = tf.div(pp,tf.to_int32(sz1))
    cc2 = tf.mod(pp,tf.to_int32(sz1))
    
    return tf.pack([cc1,cc2])
Example #28
0
def sparse_sequence_length(sparse_tensor):
  with tf.name_scope("sparse_sequence_length"):
    indices = tf.to_int32(sparse_tensor.indices)
    row_indices = indices[:, 0]
    col_indices = indices[:, 1]
    num_rows = tf.to_int32(sparse_tensor.dense_shape[0])
    row_range = tf.expand_dims(tf.range(num_rows), 0)
    row_indicator = tf.to_int32(
        tf.equal(tf.expand_dims(row_indices, 1), row_range))
    split_col_indices = row_indicator * (tf.expand_dims(col_indices, 1) + 1)
    row_lengths = tf.reduce_max(split_col_indices, [0])
  return row_lengths
Example #29
0
def cycle_gan_internal(inputs, targets, _, hparams):
  """Cycle GAN, main step used for training."""
  with tf.variable_scope("cycle_gan"):
    # Embed inputs and targets.
    inputs_orig, targets_orig = tf.to_int32(inputs), tf.to_int32(targets)
    inputs = common_layers.embedding(
        inputs_orig, hparams.vocab_size, hparams.hidden_size, "embed")
    targets = common_layers.embedding(
        targets_orig, hparams.vocab_size, hparams.hidden_size,
        "embed", reuse=True)

    # Split the batch into input-input and target-target parts.
    inputs1, _ = split_on_batch(inputs)
    _, targets2 = split_on_batch(targets)

    # Define F and G, called inp2tgt and tgt2inp here.
    def inp2tgt(x, reuse=False):
      return transformer_vae.residual_conv(x, 1, hparams, "inp2tgt", reuse)
    def tgt2inp(x, reuse=False):
      return transformer_vae.residual_conv(x, 1, hparams, "tgt2inp", reuse)

    # Input-input part.
    inp1_tgt = inp2tgt(inputs1)
    inp1_back = tgt2inp(inp1_tgt)

    # Target-target part.
    tgt2_inp = tgt2inp(targets2, reuse=True)
    tgt2_back = inp2tgt(tgt2_inp, reuse=True)

    # Reconstruction losses.
    inp1_orig, _ = split_on_batch(inputs_orig)
    _, tgt2_orig = split_on_batch(targets_orig)
    inp1_loss = reconstruct_loss(
        inp1_back, tf.squeeze(inp1_orig, axis=3), hparams)
    tgt2_loss = reconstruct_loss(
        tgt2_back, tf.squeeze(tgt2_orig, axis=3), hparams, reuse=True)

    # Discriminator losses.
    dloss1 = discriminate_loss(inputs1, tgt2_inp, True, hparams, "inp_disc")
    dloss2 = discriminate_loss(targets2, inp1_tgt, True, hparams, "tgt_disc")

    # Reconstruct targets from inputs.
    tgt = inp2tgt(inputs, reuse=True)
    tgt = tf.layers.dense(tgt, hparams.vocab_size, name="softmax", reuse=True)

    # We use the reconstruction only for tracking progress, no gradients here!
    tgt = tf.stop_gradient(tf.expand_dims(tgt, axis=2))

    losses = {"input_input": hparams.cycle_loss_multiplier * inp1_loss,
              "target_target": hparams.cycle_loss_multiplier * tgt2_loss,
              "input_disc": dloss1,
              "target_disc": dloss2}
    return tgt, losses
Example #30
0
 def _anchor_component(self):
   with tf.variable_scope('ANCHOR_' + self._tag) as scope:
     # just to get the shape right
     height = tf.to_int32(tf.ceil(self._im_info[0, 0] / np.float32(self._feat_stride[0])))
     width = tf.to_int32(tf.ceil(self._im_info[0, 1] / np.float32(self._feat_stride[0])))
     anchors, anchor_length = tf.py_func(generate_anchors_pre,
                                         [height, width,
                                          self._feat_stride, self._anchor_scales, self._anchor_ratios],
                                         [tf.float32, tf.int32], name="generate_anchors")
     anchors.set_shape([None, 4])
     anchor_length.set_shape([])
     self._anchors = anchors
     self._anchor_length = anchor_length
Example #31
0
    def _build_graph(self, dims):
        """
        Constructs a TensorFlow subgraph for counterfactual regression.
        Sets the following member variables (to TF nodes):

        self.output         The output prediction "y"
        self.tot_loss       The total objective to minimize
        self.imb_loss       The imbalance term of the objective
        self.pred_loss      The prediction term of the objective
        self.weights_in     The input/representation layer weights
        self.weights_out    The output/post-representation layer weights
        self.weights_pred   The (linear) prediction layer weights
        self.h_rep          The layer of the penalized representation
        """
        # 注意!这里的sigma是log_squared_sigma,即log(sigma^2)
        z_t_en_mu = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in,
                                                       "z_t_en_mu")
        z_t_en_sigma = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in,
                                                          "z_t_en_sigma")
        z_c_en_mu = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in,
                                                       "z_c_en_mu")
        z_c_en_sigma = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in,
                                                          "z_c_en_sigma")
        z_y_en_mu = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in,
                                                       "z_y_en_mu")
        z_y_en_sigma = self._build_fully_connected_layers(self.x, FLAGS.n_in, FLAGS.dim_in, self.dropout_in,
                                                          "z_y_en_sigma")

        z_t_de_mu = self._build_fully_connected_layers(tf.concat([self.x, self.t], -1), FLAGS.n_in,
                                                       FLAGS.dim_in, self.dropout_in, "z_t_de_mu")
        z_t_de_sigma = self._build_fully_connected_layers(tf.concat([self.x, self.t], -1), FLAGS.n_in,
                                                          FLAGS.dim_in, self.dropout_in, "z_t_de_sigma")
        z_c_de_mu = self._build_fully_connected_layers(tf.concat([self.x, self.t, self.y], -1), FLAGS.n_in,
                                                       FLAGS.dim_in, self.dropout_in, "z_c_de_mu")
        z_c_de_sigma = self._build_fully_connected_layers(tf.concat([self.x, self.t, self.y], -1), FLAGS.n_in,
                                                          FLAGS.dim_in, self.dropout_in, "z_c_de_sigma")
        z_y_de_mu = self._build_fully_connected_layers(tf.concat([self.x, self.y], -1), FLAGS.n_in,
                                                       FLAGS.dim_in, self.dropout_in, "z_y_de_mu")
        z_y_de_sigma = self._build_fully_connected_layers(tf.concat([self.x, self.y], -1), FLAGS.n_in,
                                                          FLAGS.dim_in, self.dropout_in, "z_y_de_sigma")

        z_t_sample_en = self._get_sample_from_dist(z_t_en_mu, z_t_en_sigma)
        z_c_sample_en = self._get_sample_from_dist(z_c_en_mu, z_c_en_sigma)
        z_y_sample_en = self._get_sample_from_dist(z_y_en_mu, z_y_en_sigma)
        z_t_sample_de = self._get_sample_from_dist(z_t_de_mu, z_t_de_sigma)
        z_c_sample_de = self._get_sample_from_dist(z_c_de_mu, z_c_de_sigma)
        z_y_sample_de = self._get_sample_from_dist(z_y_de_mu, z_y_de_sigma)

        zt_zc_concat_de = tf.concat([z_t_sample_de, z_c_sample_de], -1)
        # zt_zc_concat_en = tf.concat([z_t_sample_en, z_c_sample_en], -1)
        pred_t_de = tf.layers.dense(
            self._build_fully_connected_layers(zt_zc_concat_de, FLAGS.n_out, FLAGS.dim_out, self.dropout_out,
                                               't_out_net'), 1, name='pred_t_logit')
        pred_t_en = tf.layers.dense(
            self._build_fully_connected_layers(z_t_sample_en, FLAGS.n_out, FLAGS.dim_out, self.dropout_out,
                                               't_out_net_test'), 1, name='pred_t_logit_test')

        i0 = tf.to_int32(tf.where(self.t < 1)[:, 0])
        i1 = tf.to_int32(tf.where(self.t > 0)[:, 0])

        zc_zy_concat_de = tf.concat([z_c_sample_de, z_y_sample_de], -1)
        zc_zy_concat0_de = tf.gather(zc_zy_concat_de, i0)
        zc_zy_concat1_de = tf.gather(zc_zy_concat_de, i1)

        z_y_sample0_en = tf.gather(z_y_sample_en, i0)
        z_y_sample1_en = tf.gather(z_y_sample_en, i1)

        pred_y0_de = tf.layers.dense(
            self._build_fully_connected_layers(zc_zy_concat0_de, FLAGS.n_out, FLAGS.dim_out, self.dropout_out,
                                               'y0_out_net'), 1, name='pred_y0_logit')
        pred_y1_de = tf.layers.dense(
            self._build_fully_connected_layers(zc_zy_concat1_de, FLAGS.n_out, FLAGS.dim_out, self.dropout_out,
                                               'y1_out_net'), 1, name='pred_y1_logit')

        pred_y0_en = tf.layers.dense(
            self._build_fully_connected_layers(z_y_sample0_en, FLAGS.n_out, FLAGS.dim_out, self.dropout_out,
                                               'y0_out_net_test'), 1, name='pred_y0_logit_test')
        pred_y1_en = tf.layers.dense(
            self._build_fully_connected_layers(z_y_sample1_en, FLAGS.n_out, FLAGS.dim_out, self.dropout_out,
                                               'y1_out_net_test'), 1, name='pred_y1_logit_test')

        pred_y_de = tf.dynamic_stitch([i0, i1], [pred_y0_de, pred_y1_de])
        pred_y_en = tf.dynamic_stitch([i0, i1], [pred_y0_en, pred_y1_en])

        self.t_classif_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_t_de, labels=self.t)) + \
                              FLAGS.coef_t_pred*tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_t_en, labels=self.t))


        if FLAGS.loss == "log":
            self.y_predict_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_y_de, labels=self.y)) + \
                                  FLAGS.coef_y_pred*tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_y_en, labels=self.y))
        else:
            # self.y_predict_loss = tf.reduce_mean(self.sample_weight * tf.square(self.y - pred_y_de))
            self.y_predict_loss = tf.reduce_mean(tf.square(self.y - pred_y_de)) + \
                                  FLAGS.coef_y_pred*tf.reduce_mean(tf.square(self.y - pred_y_en))
            # self.y_predict_loss = tf.reduce_mean(tf.abs(self.y - pred_y_de))

        KL_zt = self._KL_distance(z_t_de_mu, z_t_de_sigma, z_t_en_mu, z_t_en_sigma)
        KL_zc = self._KL_distance(z_c_de_mu, z_c_de_sigma, z_c_en_mu, z_c_en_sigma)
        KL_zy = self._KL_distance(z_y_de_mu, z_y_de_sigma, z_y_en_mu, z_y_en_sigma)

        orth_loss_t_y = self._cal_orth(z_t_sample_en, z_y_sample_en)
        orth_loss_t_c = self._cal_orth(z_t_sample_en, z_c_sample_en)
        orth_loss_y_c = self._cal_orth(z_y_sample_en, z_c_sample_en)

        orth_loss = orth_loss_t_y + orth_loss_t_c + orth_loss_y_c

        self.tot_loss = self.t_classif_loss + self.y_predict_loss
        self.tot_loss = self.tot_loss + KL_zt + KL_zc + KL_zy
        self.tot_loss = self.tot_loss + FLAGS.coef_orth_loss * orth_loss

        if FLAGS.loss == "log":
            self.pred_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=pred_y_en, labels=self.y))
        else:
            self.pred_loss = tf.sqrt(tf.reduce_mean(tf.square(self.y - pred_y_en)))

        self.imb_dist, imb_mat = wasserstein(z_t_sample_en, self.t, 0.5, lam=FLAGS.wass_lambda,
                                             its=FLAGS.wass_iterations,
                                             sq=False, backpropT=FLAGS.wass_bpt)
        if FLAGS.loss == "log":
            self.output = tf.nn.sigmoid(pred_y_en)
        else:
            self.output = pred_y_en
Example #32
0
def cast(p):
    return tf.to_int32(tf.round(p))
Example #33
0
#second conv. layer
W_conv2 = weight_variable([fs2, fs2, nf1, nf2])
b_conv2 = bias_variable([nf2])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)


#reverse the process

##unpool, deconv 1

#unpooling
a1 = tf.transpose(h_pool2, perm = [0,3,1,2])
b1 = tf.reshape(a1,[-1,nf2,sy/4*sx/4,1])
c1 = tf.tile(b1,tf.to_int32(tf.constant(np.array([1,1,1,2]))))
d1 = tf.reshape(c1,[-1,nf2,sy/4,sx/2])
e1 = tf.tile(d1,tf.to_int32(tf.constant(np.array([1,1,1,2]))))
h_unpool1 = tf.reshape(e1, [-1,sy/2,sx/2,nf2])

#deconv
W_conv2_tr = tf.transpose(W_conv2, perm = [0,1,3,2])
h_deconv1 = tf.nn.relu(conv2d(h_conv2 - b_conv2, W_conv2_tr))

##unpool, deconv 2

#unpooling
a2 = tf.transpose(h_deconv1, perm = [0,3,1,2])
b2 = tf.reshape(a2,[-1,nf1,sy/2*sx/2,1])
c2 = tf.tile(b2,tf.to_int32(tf.constant(np.array([1,1,1,2]))))
d2 = tf.reshape(c2,[-1,nf1,sy/2,sx])
def down_shift(x):
    x_shape = x.get_shape().as_list()
    # for zero-padding
    batch_size = tf.shape(tf.reduce_sum(tf.to_int32(tf.not_equal(x, hp.vocab_size + 1)), 1))[0]
    return tf.concat((x[:, :, :, :], tf.zeros([batch_size, hp.max_len, hp.filter_h - 1, x_shape[3]])), 2)
Example #35
0
 def decode_label(label):
     label = tf.decode_raw(label, tf.uint8)  # tf.string -> [tf.uint8]
     label = tf.reshape(label, [])  # label is a scalar
     return tf.to_int32(label)
def mul_adaptive_embedding_lookup(x, n_token, d_embed, d_proj, cutoffs, initializer,
                                  proj_initializer, div_val=1, perms=None,
                                  proj_same_dim=True,
                                  scope='adaptive_embed'):
  """
  perms: If None, first compute W = W1 x W2 (projection for each bin),
      and then compute X x W (embedding lookup). If not None,
      use bin-based embedding lookup with max_bin_size defined by
      the shape of perms.
  """
  emb_scale = d_proj ** 0.5
  with tf.variable_scope(scope):
    if div_val == 1:
      lookup_table = tf.get_variable('lookup_table', [n_token, d_embed],
                                     initializer=initializer)
      y = embedding_lookup(lookup_table, x)
      if d_proj != d_embed:
        proj_W = tf.get_variable('proj_W', [d_embed, d_proj],
                                 initializer=proj_initializer)
        y = tf.einsum('ibe,ed->ibd', y, proj_W)
      else:
        proj_W = None
      ret_params = [lookup_table, proj_W]
    else:
      tables, projs = [], []
      cutoff_ends = [0] + cutoffs + [n_token]
      x_size = tf.shape(x)
      if perms is None:
        cat_lookup = []
      else:
        cat_lookup = tf.zeros([x_size[0], x_size[1], d_proj])
      for i in range(len(cutoff_ends) - 1):
        with tf.variable_scope('cutoff_{}'.format(i)):
          l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]
          cur_d_embed = d_embed // (div_val ** i)
          lookup_table = tf.get_variable('lookup_table',
                                         [r_idx - l_idx, cur_d_embed],
                                         initializer=initializer)
          if cur_d_embed == d_proj and not proj_same_dim:
            proj_W = None
          else:
            proj_W = tf.get_variable('proj_W', [cur_d_embed, d_proj],
                                   initializer=proj_initializer)
          if perms is None:
            cat_lookup.append(tf.einsum('ie,ed->id', lookup_table, proj_W))
          else:
            # speed up the computation of the first bin
            # also save some meory
            if i == 0:
              cur_y = embedding_lookup(lookup_table, tf.minimum(x, r_idx - 1))
              if proj_W is not None:
                cur_y = tf.einsum('ibe,ed->ibd', cur_y, proj_W)
              cur_y *= perms[i][:, :, None]
              cat_lookup += cur_y
            else:
              cur_x = tf.einsum('ib,ibk->k', tf.to_float(x - l_idx), perms[i])
              cur_x = tf.to_int32(cur_x)
              cur_y = embedding_lookup(lookup_table, cur_x)
              if proj_W is not None:
                cur_y = tf.einsum('ke,ed->kd', cur_y, proj_W)
              cat_lookup += tf.einsum('kd,ibk->ibd', cur_y, perms[i])
          tables.append(lookup_table)
          projs.append(proj_W)
      if perms is None:
        cat_lookup = tf.concat(cat_lookup, 0)
        y = embedding_lookup(cat_lookup, x)
      else:
        y = cat_lookup
      ret_params = [tables, projs]

  y *= emb_scale
  return y, ret_params
Example #37
0
def crop_and_resize(image, boxes, box_ind, crop_size, pad_border=True):
    """
    Aligned version of tf.image.crop_and_resize, following our definition of floating point boxes.

    Args:
        image: NCHW
        boxes: nx4, x1y1x2y2
        box_ind: (n,)
        crop_size (int):
    Returns:
        n,C,size,size
    """
    assert isinstance(crop_size, int), crop_size
    boxes = tf.stop_gradient(boxes)

    # TF's crop_and_resize produces zeros on border
    if pad_border:
        # this can be quite slow
        image = tf.pad(image, [[0, 0], [0, 0], [1, 1], [1, 1]],
                       mode='SYMMETRIC')
        boxes = boxes + 1

    @under_name_scope()
    def transform_fpcoor_for_tf(boxes, image_shape, crop_shape):
        """
        The way tf.image.crop_and_resize works (with normalized box):
        Initial point (the value of output[0]): x0_box * (W_img - 1)
        Spacing: w_box * (W_img - 1) / (W_crop - 1)
        Use the above grid to bilinear sample.

        However, what we want is (with fpcoor box):
        Spacing: w_box / W_crop
        Initial point: x0_box + spacing/2 - 0.5
        (-0.5 because bilinear sample (in my definition) assumes floating point coordinate
         (0.0, 0.0) is the same as pixel value (0, 0))

        This function transform fpcoor boxes to a format to be used by tf.image.crop_and_resize

        Returns:
            y1x1y2x2
        """
        x0, y0, x1, y1 = tf.split(boxes, 4, axis=1)

        spacing_w = (x1 - x0) / tf.to_float(crop_shape[1])
        spacing_h = (y1 - y0) / tf.to_float(crop_shape[0])

        nx0 = (x0 + spacing_w / 2 - 0.5) / tf.to_float(image_shape[1] - 1)
        ny0 = (y0 + spacing_h / 2 - 0.5) / tf.to_float(image_shape[0] - 1)

        nw = spacing_w * tf.to_float(crop_shape[1] -
                                     1) / tf.to_float(image_shape[1] - 1)
        nh = spacing_h * tf.to_float(crop_shape[0] -
                                     1) / tf.to_float(image_shape[0] - 1)

        return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=1)

    # Expand bbox to a minium size of 1
    # boxes_x1y1, boxes_x2y2 = tf.split(boxes, 2, axis=1)
    # boxes_wh = boxes_x2y2 - boxes_x1y1
    # boxes_center = tf.reshape((boxes_x2y2 + boxes_x1y1) * 0.5, [-1, 2])
    # boxes_newwh = tf.maximum(boxes_wh, 1.)
    # boxes_x1y1new = boxes_center - boxes_newwh * 0.5
    # boxes_x2y2new = boxes_center + boxes_newwh * 0.5
    # boxes = tf.concat([boxes_x1y1new, boxes_x2y2new], axis=1)

    image_shape = tf.shape(image)[2:]
    boxes = transform_fpcoor_for_tf(boxes, image_shape, [crop_size, crop_size])
    image = tf.transpose(image, [0, 2, 3, 1])  # nhwc
    ret = tf.image.crop_and_resize(image,
                                   boxes,
                                   tf.to_int32(box_ind),
                                   crop_size=[crop_size, crop_size])
    ret = tf.transpose(ret, [0, 3, 1, 2])  # ncss
    return ret
Example #38
0
def get_losses(d_out_real,
               d_out_fake,
               x_real_onehot,
               x_fake_onehot_appr,
               gen_o,
               discriminator,
               config,
               rewards=None,
               initial_samples_for_rewards=None):
    batch_size = config['batch_size']
    gan_type = config['gan_type']
    seq_len = config['seq_len']
    vocab_size = config['vocab_size']
    RL_alpha = config['rl_alpha']

    if gan_type == 'standard':  # the non-satuating GAN loss
        d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_real, labels=tf.ones_like(d_out_real)))
        d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake, labels=tf.zeros_like(d_out_fake)))
        d_loss = d_loss_real + d_loss_fake

        g_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake, labels=tf.ones_like(d_out_fake)))

    elif gan_type == 'JS':  # the vanilla GAN loss
        d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_real, labels=tf.ones_like(d_out_real)))
        d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake, labels=tf.zeros_like(d_out_fake)))
        d_loss = d_loss_real + d_loss_fake

        g_loss = -d_loss_fake

    elif gan_type == 'KL':  # the GAN loss implicitly minimizing KL-divergence
        d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_real, labels=tf.ones_like(d_out_real)))
        d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake, labels=tf.zeros_like(d_out_fake)))
        d_loss = d_loss_real + d_loss_fake

        g_loss = tf.reduce_mean(-d_out_fake)

    elif gan_type == 'hinge':  # the hinge loss
        d_loss_real = tf.reduce_mean(tf.nn.relu(1.0 - d_out_real))
        d_loss_fake = tf.reduce_mean(tf.nn.relu(1.0 + d_out_fake))
        d_loss = d_loss_real + d_loss_fake

        g_loss = -tf.reduce_mean(d_out_fake)

    elif gan_type == 'tv':  # the total variation distance
        d_loss = tf.reduce_mean(tf.tanh(d_out_fake) - tf.tanh(d_out_real))
        g_loss = tf.reduce_mean(-tf.tanh(d_out_fake))

    elif gan_type == 'wgan-gp':  # WGAN-GP
        d_loss = tf.reduce_mean(d_out_fake) - tf.reduce_mean(d_out_real)
        GP = gradient_penalty(discriminator, x_real_onehot, x_fake_onehot_appr,
                              config)
        d_loss += GP

        g_loss = -tf.reduce_mean(d_out_fake)

    elif gan_type == 'LS':  # LS-GAN
        d_loss_real = tf.reduce_mean(tf.squared_difference(d_out_real, 1.0))
        d_loss_fake = tf.reduce_mean(tf.square(d_out_fake))
        d_loss = d_loss_real + d_loss_fake

        g_loss = tf.reduce_mean(tf.squared_difference(d_out_fake, 1.0))

    elif gan_type == 'RSGAN':  # relativistic standard GAN
        d_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_real - d_out_fake,
                labels=tf.ones_like(d_out_real)))
        g_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake - d_out_real,
                labels=tf.ones_like(d_out_fake)))

    else:
        raise NotImplementedError("Divergence '%s' is not implemented" %
                                  gan_type)

    #TODO
    reinforce_loss = tf.constant(0.0)
    if '_pg' in config['g_architecture'] and '_pg' in config['d_architecture']:
        reshaped_fake_one_hot = tf.reshape(x_fake_onehot_appr,
                                           [-1, vocab_size])
        rnn_outputs_for_reinforce = tf.reduce_sum(
            tf.one_hot(
                tf.to_int32(tf.reshape(initial_samples_for_rewards, [-1])),
                vocab_size, 1.0, 0.0) *
            tf.log(tf.clip_by_value(reshaped_fake_one_hot, 1e-20, 1.0)),
            1)  # initial_samples_for_rewards[:, 1:]
        reinforce_loss = tf.reduce_mean(
            rnn_outputs_for_reinforce *
            tf.reshape(rewards, [-1]))  # reinforce_rewards[:, 1:]
        # reinforce_loss = tf.reduce_sum(rnn_outputs_for_reinforce * tf.reshape(rewards, [-1]))  # reinforce_rewards[:, 1:]
        reinforce_loss = -RL_alpha * reinforce_loss
        if config['rl_only'] == True:
            print("No gan objective in G, only policy gradients")
            g_loss = reinforce_loss
            d_loss = tf.get_variable("dummy_d_loss",
                                     initializer=0.0,
                                     trainable=False)
        else:
            g_loss += reinforce_loss

    log_pg = tf.reduce_mean(tf.log(gen_o +
                                   EPS))  # [1], measures the log p_g(x)

    return log_pg, g_loss, d_loss, reinforce_loss
Example #39
0
    def _build_model(self):
        """Build our MLP network."""

        with tf.variable_scope("Matchnet", reuse=tf.AUTO_REUSE):  #创建变量域,便于参数共享
            # For intermediate visualization 
            self.fetch_vis = {}
            # -------------------- Network archintecture --------------------
            # Import correct build_graph function
            from archs.cvpr2020 import build_graph
            # Build graph
            print("Building Graph")
            # Preprocessing input, currently doing nothing
            x_in = pre_x_in(self.x_in, self.config.pre_x_in)
            y_in = self.y_in
            self.fetch_vis["x_in"] = self.x_in
            self.fetch_vis["y_in"] = self.y_in

            logits = []
            indexs = []
            e_hats = []
            losses = []
            # Framework for iterative top-k strategy.
            # We currently disable iterative top-k by set num_phase=1.
            for i in range(self.config.num_phase):
                # Weight local is the wegiht matrix for incorporating locality into network
                # But we currently disable it by set it as None.
                weight_local = None 
                x_shp = tf.shape(x_in)
                logit, vis_dict = build_graph(x_in, self.is_training, self.config, weight_local)
                tf.summary.histogram("logit", logit)
                self.fetch_vis = {**self.fetch_vis, **vis_dict} # For visualizing intermediate layers
                self.fetch_vis["logits"] = logit[:, None, :, None]
                
                self.bool_use_weight_for_score = self.config.bool_use_weight_for_score 

                # Support different output weight for 8-point algorithm
                if self.config.weight_opt == "relu_tanh":
                    weights = tf.nn.relu(tf.tanh(logit))
                elif self.config.weight_opt == "sigmoid_softmax":
                    logit_softmax = vis_dict["logit_softmax"]
                    self.logit_softmax = logit_softmax
                    mask = tf.nn.sigmoid(logit)
                    if self.config.bool_hard_attention:
                        mask = tf.to_float(logit > 0)
                    weights = tf.exp(logit_softmax) * mask
                    weights = weights / tf.reduce_sum(weights, -1, keep_dims=True) 
                else:
                    raise ValueError("Don't support it")


                # Make input data (num_img_pair x num_corr x 4)
                xx = tf.transpose(tf.reshape(
                    x_in, (x_shp[0], x_shp[2], 4)), (0, 2, 1))

                # Create the matrix to be used for the eight-point algorithm
                X = tf.transpose(tf.stack([
                    xx[:, 2] * xx[:, 0], xx[:, 2] * xx[:, 1], xx[:, 2],
                    xx[:, 3] * xx[:, 0], xx[:, 3] * xx[:, 1], xx[:, 3],
                    xx[:, 0], xx[:, 1], tf.ones_like(xx[:, 0])
                ], axis=1), (0, 2, 1))
                self.fetch_vis["X"] = X[:, None]
                print("X shape = {}".format(X.shape))
                wX = tf.reshape(weights, (x_shp[0], x_shp[2], 1)) * X
                print("wX shape = {}".format(wX.shape))
                XwX = tf.matmul(tf.transpose(X, (0, 2, 1)), wX)
                print("XwX shape = {}".format(XwX.shape))

                # Recover essential matrix from self-adjoing eigen
                e, v = tf.self_adjoint_eig(XwX)
                e_hat = tf.reshape(v[:, :, 0], (x_shp[0], 9))
                # in case you want to directly output F
                self.out_e_hat = e_hat

                if self.config.use_fundamental > 0:
                    # Go back Essential Matrix with input norm and calibration matrix
                    e_hat = tf.reshape(e_hat, (x_shp[0], 3, 3)) 
                    e_hat = tf.matmul(
                        tf.matmul(tf.transpose(self.T2_in, (0, 2, 1)), e_hat), self.T1_in)
                    e_hat = tf.matmul(
                        tf.matmul(tf.transpose(self.K2_in, (0, 2, 1)), e_hat), self.K1_in)
                    e_hat = tf.reshape(e_hat, (x_shp[0], 9))

                e_hat /= tf.norm(e_hat, axis=1, keep_dims=True)
                last_e_hat = e_hat
                last_logit = logit
                last_x_in = x_in
                last_weights = weights

                e_hats += [e_hat]
                losses += [self._build_loss(e_hat, logit, x_in, y_in, weights, name=str(i))]
                logits += [logit]
                num_top_k = tf.to_int32(x_shp[2] * 5 / 10) # top 50% points
                # update x_in and y_in according to the logit
                x_in, index = topk(x_in, logit[:, None], num_top_k)
                y_in = tf.squeeze(tf.gather_nd(y_in[:, None], index), 1)
                indexs += [index]
            # L2 loss
            for var in tf.trainable_variables():
                if "weights" in var.name:
                    print(var.name)
                    tf.add_to_collection("l2_losses", tf.reduce_sum(var**2))
            l2_loss = tf.add_n(tf.get_collection("l2_losses"))
            tf.summary.scalar("l2_loss", l2_loss)
            # Check global_step and add essential loss
            loss = self.config.loss_decay * l2_loss
            self.loss = loss + tf.reduce_mean(tf.stack(losses))
            # repalce self.logit and self.e_hat with self.last_e_hat, 
            # self.last_logit, self.last_x_in
            self.e_hat = None
            self.logits = None
            self.last_e_hat = last_e_hat
            self.last_logit = last_logit
            self.last_x_in = last_x_in
            self.last_weights = last_weights
Example #40
0
 def _get_staffline_window_size(self, staffline_distance):
     return tf.to_int32(
         tf.round(
             tf.to_float(staffline_distance) *
             tf.to_float(self.staffline_distance_multiple)))
Example #41
0
    def biuld_net(self):
        # gragh = tf.Graph()
        # with gragh.as_default():
        ###########
        ### set top conv
        top_con = CNNs(self.x, 128, [9, 1], 2, "SAME", self.is_train)
        self.primary_cap = layers_vector(
            top_con,
            32,
            4, [9, 1],
            1,
            self.is_train,
            shapes=[-1, self.next_length * 8, 16, 1])
        # [-1,88*16,8,1]
        #with tf.variable_scope("capsules_layers"):
        fc_function = tf.reshape(self.primary_cap,
                                 shape=(-1, self.primary_cap.shape[1].value, 1,
                                        self.primary_cap.shape[-2].value, 1))
        #with tf.variable_scope("routing"):
        #[-1,88*16,1,8,1]
        blu = tf.constant(np.zeros([
            self.batch_size, self.primary_cap.shape[1].value, self.num_label,
            1, 1
        ]),
                          dtype=tf.float32)
        caps = routing(fc_function,
                       blu,
                       num_outputs=self.num_label,
                       num_dims=32)
        #### [120,37,8,1]
        top_conv_1 = CNNs(self.x, 128, [7, 1], 2, "SAME", self.is_train)
        self.primary_cap_1 = layers_vector(
            top_conv_1,
            32,
            4, [7, 1],
            1,
            self.is_train,
            shapes=[-1, self.next_length * 16, 8, 1])
        fc_function_1 = tf.reshape(
            self.primary_cap_1,
            shape=(-1, self.primary_cap_1.shape[1].value, 1,
                   self.primary_cap_1.shape[-2].value, 1))
        blu_1 = tf.constant(np.zeros([
            self.batch_size, self.primary_cap_1.shape[1].value, self.num_label,
            1, 1
        ]),
                            dtype=tf.float32)
        with tf.variable_scope("routint_1"):
            caps_1 = routing(fc_function_1, blu_1, self.num_label, 16)
        top_con_2 = CNNs(self.x, 128, [5, 1], 2, 'SAME', self.is_train)
        self.primary_cap_2 = layers_vector(
            top_con_2,
            32,
            4, [5, 1],
            1,
            self.is_train,
            shapes=[-1, self.next_length * 32, 4, 1])
        fc_function_2 = tf.reshape(
            self.primary_cap_2,
            shape=(-1, self.primary_cap_2.shape[1].value, 1,
                   self.primary_cap_2.shape[-2].value, 1))
        blu_2 = tf.constant(np.zeros([
            self.batch_size, self.primary_cap_2.shape[1].value, self.num_label,
            1, 1
        ]),
                            dtype=tf.float32)
        with tf.variable_scope("routing_2"):
            caps_2 = routing(fc_function_2, blu_2, self.num_label, 8)

        a = 3.0
        b = 1.0
        c = 1.0
        #  a = 3.0
        #  b = 1.0
        caps = tf.concat([a * caps, b * caps_1, c * caps_2], axis=3)
        # This is the best performance in our experiments.

        self.caps = tf.squeeze(caps, axis=1)
        v_length = tf.sqrt(
            reduce_sum(tf.square(self.caps), axis=2, keepdims=True) +
            eposilion)
        softmax_v = softmax(v_length, axis=1)
        #########[batch_size,num_label,1,1]
        argmax_idx = tf.to_int32(tf.argmax(softmax_v, axis=1))
        self.argmax_idx = tf.reshape(argmax_idx, shape=(self.batch_size, ))
        ###
        self.masked_v = tf.multiply(
            tf.squeeze(self.caps), tf.reshape(self.y, (-1, self.num_label, 1)))
        self.v_length = tf.sqrt(
            reduce_sum(tf.square(self.caps), axis=2, keepdims=True) +
            eposilion)
        ########
        # decoder
        vector_j = tf.reshape(self.masked_v, shape=(self.batch_size, -1))
        fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=256)
        fc1 = tf.contrib.layers.fully_connected(fc1, num_outputs=512)
        self.decode = tf.contrib.layers.fully_connected(
            fc1, num_outputs=self.length, activation_fn=tf.sigmoid)
Example #42
0
    def build_graph(self, task_index=0, time_major=True):
        # self.max_input_length = dispenser.max_input_length

        if self.server is not None:
            cluster = tf.train.ClusterSpec(self.server.server_def.cluster)
            self.is_chief = task_index == 0
            num_replicas = len(cluster.as_dict()['worker'])
            device = tf.train.replica_device_setter(
                cluster=cluster,
                worker_device='/job:worker/task:%d' % task_index)
        else:
            self.is_chief = True
            num_replicas = 1
            device = None

        # create the graph
        self.graph = tf.Graph()

        # define the placeholders in the graph

        with self.graph.as_default():
            batch_size = self.conf['batch_size']
            with tf.device(device):
                # create the inputs placeholder, time_major, [time,batch_size,input_dim]
                # max_input_length and batch_size should be None for efficiency compute?
                if time_major:
                    self.inputX = tf.placeholder(dtype=tf.float32,
                                                 shape=[
                                                     self.max_input_length,
                                                     batch_size, self.input_dim
                                                 ],
                                                 name='inputX')
                else:
                    self.inputX = tf.placeholder(dtype=tf.float32,
                                                 shape=[
                                                     batch_size,
                                                     self.max_input_length,
                                                     self.input_dim
                                                 ],
                                                 name='inputX')

                # reference labels
                self.targetY = tf.placeholder(
                    dtype=tf.int32,
                    shape=[batch_size, self.max_target_length],
                    name='targetY')

                # the length of all the input sequences
                self.input_seq_length = tf.placeholder(dtype=tf.int32,
                                                       shape=[batch_size],
                                                       name='input_seq_length')

                # the length of all the output sequences
                self.target_seq_length = tf.placeholder(
                    dtype=tf.int32,
                    shape=[batch_size],
                    name='target_seq_length')

                # compute the training outputs of the classifier
                self.trainlogits, self.logit_seq_length = self.__call__(
                    inputs=self.inputX,
                    input_seq_length=self.input_seq_length,
                    targets=self.targetY,
                    target_seq_length=self.target_seq_length,
                    is_training=True,
                    time_major=time_major)

                # create variables for validation
                with tf.variable_scope('validation'):
                    self.vallogits, self.val_logit_seq_length = self.__call__(
                        inputs=self.inputX,
                        input_seq_length=self.input_seq_length,
                        targets=self.targetY,
                        target_seq_length=self.target_seq_length,
                        is_training=False,
                        time_major=time_major)
                    self.val_loss = self.compute_ce_loss(
                        self.targetY,
                        self.vallogits,
                        self.val_logit_seq_length,
                        self.target_seq_length,
                        time_major=time_major)

                    self.predictions = tf.to_int32(
                        tf.nn.ctc_greedy_decoder(self.vallogits,
                                                 self.val_logit_seq_length,
                                                 merge_repeated=False)[0][0])

                # a variable to hold the amount of steps already taken
                self.global_step = tf.get_variable(
                    name='global_step',
                    shape=[],
                    dtype=tf.int32,
                    initializer=tf.constant_initializer(0),
                    trainable=False)

                with tf.variable_scope('train'):
                    # create the optimizer
                    if self.conf['optimizer'] == 'adam':
                        optimizer = tf.train.AdamOptimizer(
                            self.conf['learning_rate'])
                    elif self.conf['optimizer'] == 'nm':  #nestrov mometum
                        optimizer = tf.train.MomentumOptimizer(
                            self.conf['learning_rate'],
                            0.99,
                            use_nesterov=True)
                    else:
                        raise Exception('unsupported optimizer func' +
                                        self.conf['optimizer'])

                    # compute the loss
                    self.loss = self.compute_ce_loss(self.targetY,
                                                     self.trainlogits,
                                                     self.logit_seq_length,
                                                     self.target_seq_length,
                                                     time_major=time_major)

                    # compute the gradients
                    gradients, variables = zip(
                        *optimizer.compute_gradients(self.loss))

                    with tf.variable_scope('clip'):
                        # clip the gradients,to test clib_by_gloabal_norm
                        if self.conf['write_summary'] == 'yes':
                            tf.summary.scalar('global_gradients_norm',
                                              tf.global_norm(gradients))
                        gradients, _ = tf.clip_by_global_norm(
                            gradients, self.conf['grad_clip'] or 5)

                    # opperation to apply the gradients
                    apply_gradients_op = optimizer.apply_gradients(
                        grads_and_vars=zip(gradients, variables),
                        global_step=self.global_step,
                        name='apply_gradients')

                    # all remaining operations with the UPDATE_OPS GraphKeys
                    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

                    # create an operation to update the gradients, the batch_loss
                    # and do all other update ops
                    self.update_op = tf.group(*([apply_gradients_op] +
                                                update_ops),
                                              name='update')

                if self.conf['write_summary'] == 'yes':
                    # create the summaries for visualisation
                    tf.summary.scalar('validation loss', self.val_loss)
                    tf.summary.scalar('train loss', self.loss)
                    tf.summary.scalar('learning rate', self.learning_rate)

                    # create a histogram for all trainable parameters
                    for param in tf.trainable_variables():
                        tf.summary.histogram(param.name, param)

                    self.train_summary_writer = tf.summary.FileWriter(
                        os.path.join(self.conf['savepath'], 'log/train',
                                     self.conf['model']))
                    self.summary_op = tf.summary.merge_all()

                # create the saver
                self.saver = tf.train.Saver(tf.global_variables(),
                                            max_to_keep=5,
                                            keep_checkpoint_every_n_hours=1)
        def body(*args) -> BeamSearchLoopState:
            """The beam search body function. This is where the beam search
            algorithm is implemented.

            Arguments:
                loop_state: ``BeamSearchLoopState`` instance (see the docs for
                    this module)
            """
            loop_state = BeamSearchLoopState(*args)
            bs_state = loop_state.bs_state
            dec_loop_state = loop_state.decoder_loop_state

            # don't want to use this decoder with uninitialized parent
            assert self.parent_decoder.step_scope.reuse

            # CALL THE DECODER BODY FUNCTION
            # TODO figure out why mypy throws too-many-arguments on this
            next_loop_state = decoder_body(*dec_loop_state)  # type: ignore

            logits = next_loop_state.prev_logits
            rnn_state = next_loop_state.prev_rnn_state
            rnn_output = next_loop_state.prev_rnn_output
            attns = next_loop_state.prev_contexts

            # mask the probabilities
            # shape(logprobs) = beam x vocabulary
            logprobs = tf.nn.log_softmax(logits)

            finished_mask = tf.expand_dims(tf.to_float(bs_state.finished), 1)
            unfinished_logprobs = (1. - finished_mask) * logprobs

            finished_row = tf.one_hot(
                PAD_TOKEN_INDEX,
                len(self.parent_decoder.vocabulary),
                dtype=tf.float32,
                on_value=0.,
                off_value=tf.float32.min)

            finished_logprobs = finished_mask * finished_row
            logprobs = unfinished_logprobs + finished_logprobs

            # update hypothesis scores
            # shape(hyp_probs) = beam x vocabulary
            hyp_probs = tf.expand_dims(bs_state.logprob_sum, 1) + logprobs

            # update hypothesis lengths
            hyp_lengths = bs_state.lengths + 1 - tf.to_int32(bs_state.finished)

            # shape(scores) = beam x vocabulary
            scores = hyp_probs / tf.expand_dims(
                self._length_penalty(hyp_lengths), 1)

            # flatten so we can use top_k
            scores_flat = tf.reshape(scores, [-1])

            # shape(both) = beam
            topk_scores, topk_indices = tf.nn.top_k(
                scores_flat, self._beam_size)

            topk_scores.set_shape([self._beam_size])
            topk_indices.set_shape([self._beam_size])

            # flatten the hypothesis probabilities
            hyp_probs_flat = tf.reshape(hyp_probs, [-1])

            # select logprobs of the best hyps (disregard lenghts)
            next_logprob_sum = tf.gather(hyp_probs_flat, topk_indices)
            # pylint: disable=no-member
            next_logprob_sum.set_shape([self._beam_size])
            # pylint: enable=no-member

            next_word_ids = tf.mod(topk_indices,
                                   len(self.parent_decoder.vocabulary))

            next_beam_ids = tf.div(topk_indices,
                                   len(self.parent_decoder.vocabulary))

            next_beam_prev_rnn_state = tf.gather(rnn_state, next_beam_ids)
            next_beam_prev_rnn_output = tf.gather(rnn_output, next_beam_ids)
            next_beam_prev_attns = [tf.gather(a, next_beam_ids) for a in attns]
            next_lengths = tf.gather(hyp_lengths, next_beam_ids)

            # update finished flags
            has_just_finished = tf.equal(next_word_ids, END_TOKEN_INDEX)
            next_finished = tf.logical_or(
                tf.gather(bs_state.finished, next_beam_ids),
                has_just_finished)

            prev_output = loop_state.bs_output

            step = dec_loop_state.step
            output = SearchStepOutputTA(
                scores=prev_output.scores.write(step, topk_scores),
                parent_ids=prev_output.parent_ids.write(step, next_beam_ids),
                token_ids=prev_output.token_ids.write(step, next_word_ids))

            search_state = SearchState(
                logprob_sum=next_logprob_sum,
                lengths=next_lengths,
                finished=next_finished,
                last_word_ids=next_word_ids,
                last_state=next_beam_prev_rnn_state,
                last_attns=next_beam_prev_attns)

            # For run-time computation, the decoder needs:
            # - step
            # - input_symbol
            # - prev_rnn_state
            # - prev_rnn_output
            # - prev_contexts
            # - attention_loop_states
            # - finished

            # For train-mode computation, it also needs
            # - train_inputs

            # For recording the computation in time, it needs
            # - rnn_outputs (TA)
            # - logits (TA)
            # - mask (TA)

            # Because of the beam search algorithm, it outputs
            # (but does not not need)
            # - prev_logits

            # During beam search decoding, we are not interested in recording
            # of the computation as done by the decoder. The record is stored
            # in search states and step outputs of this decoder.

            next_prev_logits = tf.gather(next_loop_state.prev_logits,
                                         next_beam_ids)

            next_prev_contexts = [tf.gather(ctx, next_beam_ids) for ctx in
                                  next_loop_state.prev_contexts]

            # Update the decoder next_loop_state
            next_loop_state = next_loop_state._replace(
                input_symbol=next_word_ids,
                prev_rnn_state=next_beam_prev_rnn_state,
                prev_rnn_output=next_beam_prev_rnn_output,
                prev_logits=next_prev_logits,
                prev_contexts=next_prev_contexts,
                finished=next_finished)

            return BeamSearchLoopState(
                bs_state=search_state,
                bs_output=output,
                decoder_loop_state=next_loop_state)
Example #44
0
def coarseness(image):
    kmax = tf.constant(5)
    #image = tf.reduce_mean(image,axis=3)
    #image = tf.expand_dims(image,-1)
    image = tf.image.rgb_to_grayscale(image)

    window1 = np.power(2, 1)
    kernel1 = tf.ones([window1, window1, 1, 1])
    average_gray1 = tf.nn.conv2d(image,
                                 kernel1,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME')
    kernel_h1 = np.zeros([1, 2 * window1, 1, 1])
    kernel_h1[0][0][0][0] = -1
    kernel_h1[0][2 * window1 - 1][0][0] = 1
    horizon1 = tf.nn.conv2d(average_gray1,
                            kernel_h1,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
    horizon1 = tf.squeeze(horizon1, [3])
    kernel_v1 = np.zeros([2 * window1, 1, 1, 1])
    kernel_v1[0][0][0][0] = -1
    kernel_v1[2 * window1 - 1][0][0][0] = 1
    vertical1 = tf.nn.conv2d(average_gray1,
                             kernel_v1,
                             strides=[1, 1, 1, 1],
                             padding='SAME')
    vertical1 = tf.squeeze(vertical1, [3])

    window2 = np.power(2, 2)
    kernel2 = tf.ones([window2, window2, 1, 1])
    average_gray2 = tf.nn.conv2d(image,
                                 kernel2,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME')
    kernel_h2 = np.zeros([1, 2 * window2, 1, 1])
    kernel_h2[0][0][0][0] = -1
    kernel_h2[0][2 * window2 - 1][0][0] = 1
    horizon2 = tf.nn.conv2d(average_gray2,
                            kernel_h2,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
    horizon2 = tf.squeeze(horizon2, [3])
    kernel_v2 = np.zeros([2 * window2, 1, 1, 1])
    kernel_v2[0][0][0][0] = -1
    kernel_v2[2 * window2 - 1][0][0][0] = 1
    vertical2 = tf.nn.conv2d(average_gray2,
                             kernel_v2,
                             strides=[1, 1, 1, 1],
                             padding='SAME')
    vertical2 = tf.squeeze(vertical2, [3])

    window3 = np.power(2, 3)
    kernel3 = tf.ones([window3, window3, 1, 1])
    average_gray3 = tf.nn.conv2d(image,
                                 kernel3,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME')
    kernel_h3 = np.zeros([1, 2 * window3, 1, 1])
    kernel_h3[0][0][0][0] = -1
    kernel_h3[0][2 * window3 - 1][0][0] = 1
    horizon3 = tf.nn.conv2d(average_gray3,
                            kernel_h3,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
    horizon3 = tf.squeeze(horizon3, [3])
    kernel_v3 = np.zeros([2 * window3, 1, 1, 1])
    kernel_v3[0][0][0][0] = -1
    kernel_v3[2 * window3 - 1][0][0][0] = 1
    vertical3 = tf.nn.conv2d(average_gray3,
                             kernel_v3,
                             strides=[1, 1, 1, 1],
                             padding='SAME')
    vertical3 = tf.squeeze(vertical3, [3])

    window4 = np.power(2, 4)
    kernel4 = tf.ones([window4, window4, 1, 1])
    average_gray4 = tf.nn.conv2d(image,
                                 kernel4,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME')
    kernel_h4 = np.zeros([1, 2 * window4, 1, 1])
    kernel_h4[0][0][0][0] = -1
    kernel_h4[0][2 * window4 - 1][0][0] = 1
    horizon4 = tf.nn.conv2d(average_gray4,
                            kernel_h4,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
    horizon4 = tf.squeeze(horizon4, [3])
    kernel_v4 = np.zeros([2 * window4, 1, 1, 1])
    kernel_v4[0][0][0][0] = -1
    kernel_v4[2 * window4 - 1][0][0][0] = 1
    vertical4 = tf.nn.conv2d(average_gray4,
                             kernel_v4,
                             strides=[1, 1, 1, 1],
                             padding='SAME')
    vertical4 = tf.squeeze(vertical4, [3])

    window5 = np.power(2, 5)
    kernel5 = tf.ones([window5, window5, 1, 1])
    average_gray5 = tf.nn.conv2d(image,
                                 kernel5,
                                 strides=[1, 1, 1, 1],
                                 padding='SAME')
    kernel_h5 = np.zeros([1, 2 * window5, 1, 1])
    kernel_h5[0][0][0][0] = -1
    kernel_h5[0][2 * window5 - 1][0][0] = 1
    horizon5 = tf.nn.conv2d(average_gray5,
                            kernel_h5,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
    horizon5 = tf.squeeze(horizon5, [3])
    kernel_v5 = np.zeros([2 * window5, 1, 1, 1])
    kernel_v5[0][0][0][0] = -1
    kernel_v5[2 * window5 - 1][0][0][0] = 1
    vertical5 = tf.nn.conv2d(average_gray5,
                             kernel_v5,
                             strides=[1, 1, 1, 1],
                             padding='SAME')
    vertical5 = tf.squeeze(vertical5, [3])

    #return tf.shape(horizon5)
    horizon = tf.concat([horizon1, horizon2, horizon3, horizon4, horizon5], 0)
    vertical = tf.concat(
        [vertical1, vertical2, vertical3, vertical4, vertical5], 0)
    h_max_index = tf.to_int32(tf.argmax(horizon, 0))
    v_max_index = tf.to_int32(tf.argmax(vertical, 0))
    h_max = tf.reduce_max(horizon, 0)
    v_max = tf.reduce_max(vertical, 0)
    comp = tf.greater(h_max, v_max)
    Sbest = tf.where(comp, h_max_index, v_max_index)
    #return tf.shape(Sbest)
    Sbest = tf.to_float(tf.pow(2, Sbest))
    frcs = tf.reduce_mean(Sbest)
    return frcs
def mul_adaptive_logsoftmax(hidden, target, n_token, d_embed, d_proj, cutoffs,
                            params, tie_projs,
                            initializer=None, proj_initializer=None,
                            div_val=1, perms=None, proj_same_dim=True,
                            scope='adaptive_softmax',
                            **kwargs):
  def _logit(x, W, b, proj):
    y = x
    if x.shape.ndims == 3:
      if proj is not None:
        y = tf.einsum('ibd,ed->ibe', y, proj)
      return tf.einsum('ibd,nd->ibn', y, W) + b
    else:
      if proj is not None:
        y = tf.einsum('id,ed->ie', y, proj)
      return tf.einsum('id,nd->in', y, W) + b

  params_W, params_projs = params[0], params[1]

  with tf.variable_scope(scope):
    if len(cutoffs) == 0:
      softmax_b = tf.get_variable('bias', [n_token],
                                  initializer=tf.zeros_initializer())
      output = _logit(hidden, params_W, softmax_b, params_projs)
      nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target,
                                                           logits=output)
      nll = tf.reduce_mean(nll)
    else:
      total_loss, total_cnt = 0, 0
      cutoff_ends = [0] + cutoffs + [n_token]
      for i in range(len(cutoff_ends) - 1):
        with tf.variable_scope('cutoff_{}'.format(i)):
          l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1]

          cur_d_embed = d_embed // (div_val ** i)

          if div_val == 1:
            cur_W = params_W[l_idx: r_idx]
          else:
            cur_W = params_W[i]
          cur_b = tf.get_variable('b', [r_idx - l_idx],
                                  initializer=tf.zeros_initializer())
          if tie_projs[i]:
            if div_val == 1:
              cur_proj = params_projs
            else:
              cur_proj = params_projs[i]
          else:
            if (div_val == 1 or not proj_same_dim) and d_proj == cur_d_embed:
              cur_proj = None
            else:
              cur_proj = tf.get_variable('proj', [cur_d_embed, d_proj],
                                         initializer=proj_initializer)

          if i == 0:
            cluster_W = tf.get_variable('cluster_W', [len(cutoffs), d_embed],
                                        initializer=tf.zeros_initializer())
            cluster_b = tf.get_variable('cluster_b', [len(cutoffs)],
                                        initializer=tf.zeros_initializer())
            cur_W = tf.concat([cur_W, cluster_W], 0)
            cur_b = tf.concat([cur_b, cluster_b], 0)

            head_logit = _logit(hidden, cur_W, cur_b, cur_proj)

            head_target = kwargs.get("head_target")
            head_nll = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=head_target,
                logits=head_logit)

            masked_loss = head_nll * perms[i]
            total_loss += tf.reduce_sum(masked_loss)
            total_cnt += tf.reduce_sum(perms[i])

            # head_logprob = tf.nn.log_softmax(head_logit)

            # final_logprob = head_logprob * perms[i][:, :, None]
            # final_target = tf.one_hot(target, tf.shape(head_logprob)[2])
            # total_loss -= tf.einsum('ibn,ibn->', final_logprob, final_target)
            # total_cnt += tf.reduce_sum(perms[i])
          else:
            cur_head_nll = tf.einsum('ib,ibk->k', head_nll, perms[i])

            cur_hidden = tf.einsum('ibd,ibk->kd', hidden, perms[i])
            tail_logit = _logit(cur_hidden, cur_W, cur_b, cur_proj)

            tail_target = tf.einsum('ib,ibk->k', tf.to_float(target - l_idx),
                                    perms[i])
            tail_nll = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tf.to_int32(tail_target),
                logits=tail_logit)

            sum_nll = cur_head_nll + tail_nll
            mask = tf.reduce_sum(perms[i], [0, 1])

            masked_loss = sum_nll * mask
            total_loss += tf.reduce_sum(masked_loss)
            total_cnt += tf.reduce_sum(mask)

      nll = total_loss / total_cnt

  return nll
Example #46
0
def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits,
                                                  labels,
                                                  num_classes,
                                                  ignore_label,
                                                  loss_weight=1.0,
                                                  upsample_logits=True,
                                                  hard_example_mining_step=0,
                                                  top_k_percent_pixels=1.0,
                                                  gt_is_matting_map=False,
                                                  scope=None):
    """Adds softmax cross entropy loss for logits of each scale.

  Args:
    scales_to_logits: A map from logits names for different scales to logits.
      The logits have shape [batch, logits_height, logits_width, num_classes].
    labels: Groundtruth labels with shape [batch, image_height, image_width, 1].
    num_classes: Integer, number of target classes.
    ignore_label: Integer, label to ignore.
    loss_weight: A float or a list of loss weights. If it is a float, it means
      all the labels have the same weight. If it is a list of weights, then each
      element in the list represents the weight for the label of its index, for
      example, loss_weight = [0.1, 0.5] means the weight for label 0 is 0.1 and
      the weight for label 1 is 0.5.
    upsample_logits: Boolean, upsample logits or not.
    hard_example_mining_step: An integer, the training step in which the hard
      exampling mining kicks off. Note that we gradually reduce the mining
      percent to the top_k_percent_pixels. For example, if
      hard_example_mining_step = 100K and top_k_percent_pixels = 0.25, then
      mining percent will gradually reduce from 100% to 25% until 100K steps
      after which we only mine top 25% pixels.
    top_k_percent_pixels: A float, the value lies in [0.0, 1.0]. When its value
      < 1.0, only compute the loss for the top k percent pixels (e.g., the top
      20% pixels). This is useful for hard pixel mining.
    gt_is_matting_map: If true, the groundtruth is a matting map of confidence
      score. If false, the groundtruth is an integer valued class mask.
    scope: String, the scope for the loss.

  Raises:
    ValueError: Label or logits is None, or groundtruth is matting map while
      label is not floating value.
  """
    if labels is None:
        raise ValueError('No label for softmax cross entropy loss.')

    # If input groundtruth is a matting map of confidence, check if the input
    # labels are floating point values.
    if gt_is_matting_map and not labels.dtype.is_floating:
        raise ValueError(
            'Labels must be floats if groundtruth is a matting map.')

    for scale, logits in six.iteritems(scales_to_logits):
        loss_scope = None
        if scope:
            loss_scope = '%s_%s' % (scope, scale)

        if upsample_logits:
            # Label is not downsampled, and instead we upsample logits.
            logits = tf.image.resize_bilinear(logits,
                                              preprocess_utils.resolve_shape(
                                                  labels, 4)[1:3],
                                              align_corners=True)
            scaled_labels = labels
        else:
            # Label is downsampled to the same size as logits.
            # When gt_is_matting_map = true, label downsampling with nearest neighbor
            # method may introduce artifacts. However, to avoid ignore_label from
            # being interpolated with other labels, we still perform nearest neighbor
            # interpolation.
            # TODO(huizhongc): Change to bilinear interpolation by processing padded
            # and non-padded label separately.
            if gt_is_matting_map:
                tf.logging.warning(
                    'Label downsampling with nearest neighbor may introduce artifacts.'
                )

            scaled_labels = tf.image.resize_nearest_neighbor(
                labels,
                preprocess_utils.resolve_shape(logits, 4)[1:3],
                align_corners=True)

        scaled_labels = tf.reshape(scaled_labels, shape=[-1])
        unimib_weights = [0.96, 55, 128, 139, 123, 168, 279, 350]
        weights = utils.get_label_weight_mask(scaled_labels,
                                              ignore_label,
                                              num_classes,
                                              label_weights=unimib_weights)
        # Dimension of keep_mask is equal to the total number of pixels.
        keep_mask = tf.cast(tf.not_equal(scaled_labels, ignore_label),
                            dtype=tf.float32)

        train_labels = None
        logits = tf.reshape(logits, shape=[-1, num_classes])

        if gt_is_matting_map:
            # When the groundtruth is integer label mask, we can assign class
            # dependent label weights to the loss. When the groundtruth is image
            # matting confidence, we do not apply class-dependent label weight (i.e.,
            # label_weight = 1.0).
            if loss_weight != 1.0:
                raise ValueError(
                    'loss_weight must equal to 1 if groundtruth is matting map.'
                )

            # Assign label value 0 to ignore pixels. The exact label value of ignore
            # pixel does not matter, because those ignore_value pixel losses will be
            # multiplied to 0 weight.
            train_labels = scaled_labels * keep_mask

            train_labels = tf.expand_dims(train_labels, 1)
            train_labels = tf.concat([1 - train_labels, train_labels], axis=1)
        else:
            train_labels = tf.one_hot(scaled_labels,
                                      num_classes,
                                      on_value=1.0,
                                      off_value=0.0)

        default_loss_scope = ('softmax_all_pixel_loss' if top_k_percent_pixels
                              == 1.0 else 'softmax_hard_example_mining')
        with tf.name_scope(loss_scope, default_loss_scope,
                           [logits, train_labels, weights]):
            # Compute the loss for all pixels.
            pixel_losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=tf.stop_gradient(train_labels,
                                        name='train_labels_stop_gradient'),
                logits=logits,
                name='pixel_losses')
            weighted_pixel_losses = tf.multiply(pixel_losses, weights)

            if top_k_percent_pixels == 1.0:
                total_loss = tf.reduce_sum(weighted_pixel_losses)
                num_present = tf.reduce_sum(keep_mask)
                loss = _div_maybe_zero(total_loss, num_present)
                tf.losses.add_loss(loss)
            else:
                num_pixels = tf.to_float(tf.shape(logits)[0])
                # Compute the top_k_percent pixels based on current training step.
                if hard_example_mining_step == 0:
                    # Directly focus on the top_k pixels.
                    top_k_pixels = tf.to_int32(top_k_percent_pixels *
                                               num_pixels)
                else:
                    # Gradually reduce the mining percent to top_k_percent_pixels.
                    global_step = tf.to_float(
                        tf.train.get_or_create_global_step())
                    ratio = tf.minimum(1.0,
                                       global_step / hard_example_mining_step)
                    top_k_pixels = tf.to_int32((ratio * top_k_percent_pixels +
                                                (1.0 - ratio)) * num_pixels)
                top_k_losses, _ = tf.nn.top_k(weighted_pixel_losses,
                                              k=top_k_pixels,
                                              sorted=True,
                                              name='top_k_percent_pixels')
                total_loss = tf.reduce_sum(top_k_losses)
                num_present = tf.reduce_sum(
                    tf.to_float(tf.not_equal(top_k_losses, 0.0)))
                loss = _div_maybe_zero(total_loss, num_present)
                tf.losses.add_loss(loss)
Example #47
0
    def __init__(self, I_size, O_size, n_control):
        #The network recieves a frame from the game, flattened into an array.
        #It then resizes it and processes it through four convolutional layers.
        self.scalarInput = tf.placeholder(shape=[None, I_size],
                                          dtype=tf.float32)

        self.f_connect1 = tf.contrib.layers.fully_connected(
            inputs=self.scalarInput,
            num_outputs=64,
            activation_fn=tf.nn.relu,
            weights_initializer=tf.random_normal_initializer(),
            biases_initializer=tf.random_normal_initializer())
        self.f_connect2 = tf.contrib.layers.fully_connected(
            inputs=self.f_connect1,
            num_outputs=64,
            activation_fn=tf.nn.relu,
            weights_initializer=tf.random_normal_initializer(),
            biases_initializer=tf.random_normal_initializer())
        self.f_connect3 = tf.contrib.layers.fully_connected(
            inputs=self.f_connect2,
            num_outputs=64,
            activation_fn=tf.nn.relu,
            weights_initializer=tf.random_normal_initializer(),
            biases_initializer=tf.random_normal_initializer())
        self.f_connect4 = tf.contrib.layers.fully_connected(
            inputs=self.f_connect3,
            num_outputs=O_size,
            activation_fn=tf.nn.relu,
            weights_initializer=tf.random_normal_initializer(),
            biases_initializer=tf.random_normal_initializer())

        #We take the output from the final convolutional layer and split it into separate advantage and value streams.
        self.streamAC, self.streamVC = tf.split(self.f_connect4,
                                                num_or_size_splits=2,
                                                axis=1)

        #self.streamA = slim.flatten(self.streamAC)
        #self.streamV = slim.flatten(self.streamVC)

        self.streamA = self.streamAC
        self.streamV = self.streamVC

        xavier_init = tf.contrib.layers.xavier_initializer()
        self.AW = tf.Variable(xavier_init([O_size // 2, 5 * n_control]))
        self.VW = tf.Variable(xavier_init([O_size // 2, 1]))
        self.Advantage = tf.matmul(self.streamA, self.AW)
        self.Value = tf.matmul(self.streamV, self.VW)

        #Then combine them together to get our final Q-values.
        self.Qout = self.Value + tf.subtract(
            self.Advantage, tf.reduce_mean(self.Advantage, keep_dims=True))

        sizeQ = tf.shape(self.Qout)

        self.Qout_reshape = tf.reshape(self.Qout, [
            tf.to_int32(sizeQ[0] * n_control),
            tf.to_int32(sizeQ[1] / n_control)
        ])

        self.predict = tf.argmax(self.Qout_reshape, 1)

        #network generate all the action-value pair for the input state, we sample some action-value pair from memory, we just need to min the different between o and out

        #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
        self.targetQ = tf.placeholder(shape=[None, n_control],
                                      dtype=tf.float32)
        self.actions = tf.placeholder(shape=[None, n_control], dtype=tf.int32)

        self.actions_onehot = tf.one_hot(self.actions, 5, dtype=tf.float32)

        hotsize = tf.shape(self.actions_onehot)

        self.reshape_hot = tf.reshape(self.actions_onehot,
                                      [hotsize[0] * n_control, 5])

        self.sum = tf.reduce_sum(tf.multiply(self.Qout_reshape,
                                             self.reshape_hot),
                                 axis=1)
        self.Q = tf.reshape(self.sum, [hotsize[0], n_control])
        self.td_error = tf.square(self.targetQ - self.Q)
        self.loss = tf.reduce_mean(self.td_error)
        self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001)
        self.updateModel = self.trainer.minimize(self.loss)
Example #48
0
    def discrete_bottleneck(self, x, scope="bottleneck"):
        """Discretization bottleneck for latent variables.

    Args:
        x: Input to the discretization bottleneck.
        scope: Scope of the function.

    Returns:
        Embedding to pass to the decoder, discrete latent, loss, and the
        embedding
        function.

    Raises:
        ValueError: If projection_tensors is None for reshape_method
        project, or
        ema_count or ema_means is None if we are using ema, or unknown
        args.
    """
        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
            x_reshaped = self.slice_hidden(x)
            x_res = x_reshaped
            x_means_hot = []
            x_means = 0
            loss = 0
            for i in range(self.hparams.num_residuals):
                x_means_hot_res, x_means_res, q_loss_res, e_loss_res = \
                    self.embedding_lookup(x_reshaped, self.hparams.means[i])

                # Update the ema variables
                if self.hparams.ema:
                    tf.logging.info("Using EMA with beta = {}".format(
                        self.hparams.beta))
                    updated_ema_count_res = \
                        moving_averages.assign_moving_average(
                            self.hparams.ema_count[i],
                            tf.reduce_sum(
                                tf.reshape(
                                    x_means_hot_res,
                                    shape=[-1, self.hparams.num_blocks,
                                           self.hparams.block_v_size]),
                                axis=0),
                            self.hparams.decay,
                            zero_debias=False)

                    dw = tf.matmul(
                        tf.transpose(x_means_hot_res, perm=[1, 2, 0]),
                        tf.transpose(x_res, perm=[1, 0, 2]))

                    updated_ema_means_res = \
                        moving_averages.assign_moving_average(
                            self.hparams.ema_means[i], dw, self.hparams.decay,
                            zero_debias=False)
                    n = tf.reduce_sum(updated_ema_count_res,
                                      axis=-1,
                                      keep_dims=True)
                    updated_ema_count_res = (
                        (updated_ema_count_res + self.hparams.epsilon) /
                        (n + 2**self.hparams.z_size * self.hparams.epsilon) *
                        n)
                    updated_ema_means_res = updated_ema_means_res / tf.expand_dims(
                        updated_ema_count_res, axis=-1)
                    with tf.control_dependencies([e_loss_res]):
                        print("self.hparams.means[i]", self.hparams.means[i])
                        # raw_input()
                        update_means_res = tf.assign(self.hparams.means[i],
                                                     updated_ema_means_res)
                        # update_means_res = self.hparams.means[i]
                        with tf.control_dependencies([update_means_res]):
                            loss += self.hparams.beta * e_loss_res
                else:
                    loss += q_loss_res + self.hparams.beta * e_loss_res

                # Update the residuals
                x_res -= x_means_res
                x_means += x_means_res
                x_means_hot.append(x_means_hot_res)

            # Get the discrete latent representation
            x_means_hot = tf.stack(x_means_hot, axis=1)
            x_means_idx = tf.argmax(x_means_hot, axis=-1)

            # Get the binary representation
            num_bits = int(
                self.hparams.z_size //
                (self.hparams.num_blocks * self.hparams.num_residuals))
            x_means_bits = self.int_to_bit(x_means_idx,
                                           num_bits=num_bits,
                                           base=2)
            shape = common_layers.shape_list(x_means_bits)
            new_shape = shape[:-2]
            new_shape[0] = -1
            new_shape[-1] = self.hparams.z_size
            x_means_bits = tf.reshape(x_means_bits, new_shape)
            x_discrete = self.bit_to_int(tf.to_int32(x_means_bits),
                                         num_bits=self.hparams.z_size,
                                         base=2)

            # Reshape x_discrete
            shape_x = common_layers.shape_list(x)
            shape_discrete = shape_x[:-1]
            x_discrete = tf.reshape(x_discrete, shape_discrete)
            x_means = tf.reshape(x_means, shape=shape_x)
            h1 = x + tf.stop_gradient(x_means - x)

            h2 = tf.layers.dense(tf.nn.relu(h1),
                                 self.hparams.filter_size,
                                 name="vch2")
            res = tf.layers.dense(tf.nn.relu(h2),
                                  self.hparams.hidden_size,
                                  name="vcfin")
            embed_fn = partial(self.embed, scope=scope)
            return {
                "dense": res,
                "discrete": x_discrete,
                "loss": loss,
                "embed": embed_fn
            }
Example #49
0
    def create_network(self, inputs, input_seq_len, dropout_rate,
                       reuse_variables):
        network_proto = self.network_proto
        seq_len = input_seq_len
        batch_size = tf.shape(inputs)[0]
        gpu_enabled = self.gpu_available

        with tf.variable_scope("cnn_lstm", reuse=reuse_variables) as scope:
            no_layers = len(network_proto.layers) == 0
            if not no_layers:
                has_conv_or_pool = network_proto.layers[
                    0].type != LayerParams.LSTM
            else:
                has_conv_or_pool = False

            factor = 1
            if has_conv_or_pool:
                cnn_inputs = tf.reshape(
                    inputs, [batch_size, -1, network_proto.features, 1])
                shape = seq_len, network_proto.features

                layers = [cnn_inputs]
                last_num_filters = 1

                cnn_layer_index = 0
                for layer in [
                        l for l in network_proto.layers
                        if l.type != LayerParams.LSTM
                ]:
                    if layer.type == LayerParams.CONVOLUTIONAL:
                        layers.append(
                            tf.layers.conv2d(
                                name="conv2d" if cnn_layer_index == 0 else
                                "conv2d_{}".format(cnn_layer_index),
                                inputs=layers[-1],
                                filters=layer.filters,
                                kernel_size=(layer.kernel_size.x,
                                             layer.kernel_size.y),
                                padding="same",
                                activation=tf.nn.relu,
                                reuse=reuse_variables,
                            ))
                        cnn_layer_index += 1
                        last_num_filters = layer.filters
                    elif layer.type == LayerParams.MAX_POOLING:
                        layers.append(
                            tf.layers.max_pooling2d(
                                inputs=layers[-1],
                                pool_size=(layer.kernel_size.x,
                                           layer.kernel_size.y),
                                strides=(layer.stride.x, layer.stride.y),
                                padding="same",
                            ))

                        shape = (tf.to_int32(shape[0] // layer.stride.x),
                                 shape[1] // layer.stride.y)
                        factor *= layer.stride.x
                    else:
                        raise Exception("Unknown layer of type %s" %
                                        layer.type)

                lstm_seq_len, lstm_num_features = shape
                rnn_inputs = tf.reshape(layers[-1], [
                    batch_size,
                    tf.shape(layers[-1])[1],
                    last_num_filters * lstm_num_features
                ])

                lstm_num_features = last_num_filters * lstm_num_features
            else:
                rnn_inputs = inputs
                lstm_seq_len = seq_len
                lstm_num_features = network_proto.features

            lstm_layers = [
                l for l in network_proto.layers if l.type == LayerParams.LSTM
            ]

            # Time major inputs required for lstm
            time_major_inputs = tf.transpose(rnn_inputs, [1, 0, 2])

            if len(lstm_layers) > 0:
                for i, lstm in enumerate(lstm_layers):
                    if lstm.hidden_nodes != lstm_layers[0].hidden_nodes:
                        raise Exception(
                            "Currently all lstm layers must have an equal number of hidden nodes. "
                            "Got {} != {}".format(lstm.hidden_nodes,
                                                  lstm_layers[0].hidden_nodes))

                def cpu_cudnn_compatible_lstm_backend(time_major_inputs,
                                                      hidden_nodes):
                    def get_lstm_cell(num_hidden):
                        return cudnn_rnn.CudnnCompatibleLSTMCell(
                            num_hidden, reuse=reuse_variables)

                    fw, bw = zip(*[(get_lstm_cell(hidden_nodes),
                                    get_lstm_cell(hidden_nodes))
                                   for _ in lstm_layers])

                    time_major_outputs, output_fw, output_bw \
                        = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(list(fw), list(bw), time_major_inputs,
                                                                         sequence_length=lstm_seq_len,
                                                                         dtype=tf.float32,
                                                                         scope="cudnn_lstm/stack_bidirectional_rnn",
                                                                         time_major=True,
                                                                         )

                    return time_major_outputs

                def gpu_cudnn_lstm_backend(time_major_inputs, hidden_nodes):
                    # Create the Cudnn LSTM factory
                    rnn_lstm = cudnn_rnn.CudnnLSTM(
                        len(lstm_layers),
                        hidden_nodes,
                        direction='bidirectional',
                        kernel_initializer=tf.initializers.random_uniform(
                            -0.1, 0.1))

                    # TODO: Check if the models are loadable from meta Graph, maybe the next line fixed this
                    rnn_lstm._saveable_cls = cudnn_rnn.CudnnLSTMSaveable

                    # Apply the lstm to the inputs
                    time_major_outputs, (
                        output_h, output_c) = rnn_lstm(time_major_inputs)
                    return time_major_outputs

                if network_proto.backend.cudnn:
                    if gpu_enabled:
                        print("Using CUDNN LSTM backend on GPU")
                        time_major_outputs = gpu_cudnn_lstm_backend(
                            time_major_inputs, lstm_layers[0].hidden_nodes)
                    else:
                        print("Using CUDNN compatible LSTM backend on CPU")
                        time_major_outputs = cpu_cudnn_compatible_lstm_backend(
                            time_major_inputs, lstm_layers[0].hidden_nodes)
                else:
                    raise Exception("Only cudnn based backend supported yet.")

                # Set the output size
                output_size = lstm_layers[-1].hidden_nodes * 2
            else:
                output_size = lstm_num_features
                time_major_outputs = time_major_inputs

            # flatten to (T * N, F) for matrix multiplication. This will be reversed later
            time_major_outputs = tf.reshape(
                time_major_outputs,
                [-1, time_major_outputs.shape.as_list()[2]])

            if network_proto.dropout > 0:
                time_major_outputs = tf.nn.dropout(time_major_outputs,
                                                   1 - dropout_rate,
                                                   name="dropout")

            # we need to turn off validate_shape so we can resize the variable on a codec resize
            w = tf.get_variable('W',
                                validate_shape=False,
                                initializer=tf.random_uniform(
                                    [output_size, network_proto.classes], -0.1,
                                    0.1))
            b = tf.get_variable('B',
                                validate_shape=False,
                                initializer=tf.constant(
                                    0., shape=[network_proto.classes]))

            # the output layer
            time_major_logits = tf.matmul(time_major_outputs, w) + b

            # reshape back
            time_major_logits = tf.reshape(
                time_major_logits,
                [-1, batch_size, tf.shape(w)[-1]],
                name="time_major_logits")

            time_major_softmax = tf.nn.softmax(time_major_logits, -1,
                                               "time_major_softmax")

            logits = tf.transpose(time_major_logits, [1, 0, 2], name="logits")
            softmax = tf.transpose(time_major_softmax, [1, 0, 2],
                                   name="softmax")

            lstm_seq_len = tf.identity(lstm_seq_len, "seq_len_out")

            # DECODER
            # ================================================================
            if network_proto.ctc == NetworkParams.CTC_DEFAULT:
                decoded, log_prob = ctc_ops.ctc_greedy_decoder(
                    time_major_logits,
                    lstm_seq_len,
                    merge_repeated=network_proto.ctc_merge_repeated)
            elif network_proto.ctc == NetworkParams.CTC_FUZZY:
                decoded, log_prob = self.fuzzy_module['decoder_op'](
                    softmax, lstm_seq_len)
            else:
                raise Exception(
                    "Unknown ctc model: '%s'. Supported are Default and Fuzzy"
                    % network_proto.ctc)

            decoded = decoded[0]
            sparse_decoded = (
                tf.identity(decoded.indices, name="decoded_indices"),
                tf.identity(decoded.values, name="decoded_values"),
                tf.identity(decoded.dense_shape, name="decoded_shape"),
            )

            return lstm_seq_len, time_major_logits, time_major_softmax, logits, softmax, decoded, sparse_decoded, factor
def read_label(tf_bytestring):
    label = tf.decode_raw(tf_bytestring, tf.uint8)
    label = tf.reshape(label, [])
    return tf.to_int32(label)
def right_shift(x):
    x_shape = x.get_shape().as_list()
    # for zero-padding
    batch_size = tf.shape(tf.reduce_sum(tf.to_int32(tf.not_equal(x, hp.vocab_size + 1)), 1))[0]
    return tf.concat((tf.zeros([batch_size, hp.filter_h - 1, hp.word_embed_size + hp.filter_h - 1, x_shape[3]]),
                      x[:, :, :, :]), 1)
    def _init_env(self):
        FLAGS.use_tpu = False
        tf.logging.set_verbosity(tf.logging.DEBUG)
        tf.logging.info("Import usr dir from %s", self._usr_dir)
        if self._usr_dir != None:
            usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
        tf.logging.info("Start to create hparams,for %s of %s", self._problem,
                        self._hparams_set)

        self._hparams = create_hparams()
        self._hparams_decode = create_decode_hparams(
            extra_length=self._extra_length,
            batch_size=self._batch_size,
            beam_size=self._beam_size,
            alpha=self._alpha,
            return_beams=self._return_beams,
            write_beam_scores=self._write_beam_scores)

        self.estimator = trainer_lib.create_estimator(
            FLAGS.model,
            self._hparams,
            t2t_trainer.create_run_config(self._hparams),
            decode_hparams=self._hparams_decode,
            use_tpu=False)

        tf.logging.info("Finish intialize environment")

        ####### problem type :输出分类 还是序列 还是语言模型
        self.problem_type = self._hparams.problems[0].target_modality[
            0]  #class? symble
        self._whether_has_inputs = self._hparams.problem_instances[
            0].has_inputs
        self._beam_size = 1 if self.problem_type == 'class_label' else self._beam_size

        ### make input placeholder
        self._inputs_ph = tf.placeholder(
            dtype=tf.int32)  # shape not specified,any shape

        x = tf.placeholder(dtype=tf.int32)
        x.set_shape([None, None])  # ? -> (?,?)
        x = tf.expand_dims(x, axis=[2])  # -> (?,?,1)
        x = tf.to_int32(x)
        self._inputs_ph = x

        #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1])
        batch_inputs = x

        # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1])

        #targets_ph = tf.placeholder(dtype=tf.int32)
        #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])
        self._features = {
            "inputs": batch_inputs,
            "problem_choice": 0,  # We run on the first problem here.
            "input_space_id": self._hparams.problems[0].input_space_id,
            "target_space_id": self._hparams.problems[0].target_space_id
        }
        ### 加入 decode length  变长的
        self.input_extra_length_ph = tf.placeholder(dtype=tf.int32)
        self._features['decode_length'] = self.input_extra_length_ph
        ####
        # features['decode_length_decide_end']=True
        ###### target if transformer_scorer

        if self._model_name.lower().find('score') != -1:
            self._targets_ph = tf.placeholder(tf.int32,
                                              shape=(1, None, 1, 1),
                                              name='targets')
            self._features['targets'] = self._targets_ph  # batch targets
            self._target_pretend = np.zeros((1, 1, 1, 1))

        ####
        mode = tf.estimator.ModeKeys.PREDICT
        # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams,
        #                                         problem_names=[self._problem], decode_hparams=self._hparams_dc)
        predictions_dict = self.estimator._call_model_fn(
            self._features, None, mode,
            t2t_trainer.create_run_config(self._hparams))
        self._predictions_dict = predictions_dict.predictions
        #self._predictions = self._predictions_dict["outputs"]
        # self._scores=predictions_dict['scores'] not return when greedy search
        tf.logging.info("Start to init tf session")
        if self._isGpu:
            print('Using GPU in Decoder')
            gpu_options = tf.GPUOptions(
                per_process_gpu_memory_fraction=self._fraction)
            self._sess = tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False,
                                      gpu_options=gpu_options))
        else:
            print('Using CPU in Decoder')
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0)
            config = tf.ConfigProto(gpu_options=gpu_options)
            config.allow_soft_placement = True
            config.log_device_placement = False
            self._sess = tf.Session(config=config)
        with self._sess.as_default():
            ckpt = saver_mod.get_checkpoint_state(self._model_dir)
            saver = tf.train.Saver()
            tf.logging.info("Start to restore the parameters from %s",
                            ckpt.model_checkpoint_path)
            saver.restore(self._sess, ckpt.model_checkpoint_path)
        tf.logging.info("Finish intialize environment")
Example #53
0
def _target_len_mask(targets, sequence_length):
    # Mask out losses that are beyond the sequence length for each examples.
    max_seq_len = tf.shape(targets)[1]
    return tf.sequence_mask(tf.to_int32(sequence_length),
                            max_seq_len,
                            dtype=tf.float32)
Example #54
0
def ae_transformer_internal(inputs,
                            targets,
                            target_space,
                            hparams,
                            cache=None,
                            predict_mask=1.0):
    """AE Transformer, main step used for training."""
    # Summaries break with the do_refine cond, turn them off in that case.
    global _DO_SUMMARIES
    if hparams.do_refine:
        _DO_SUMMARIES = False

    # Prepare.
    if inputs is not None:
        batch_size = common_layers.shape_list(inputs)[0]
    else:
        batch_size = common_layers.shape_list(targets)[0]
    targets = tf.reshape(targets, [batch_size, -1, 1, hparams.hidden_size])

    # Encoder.
    if inputs is not None:
        inputs = common_layers.flatten4d3d(inputs)
        inputs, ed = encode(inputs, target_space, hparams, "input_enc")
        inputs_ex, ed_ex = inputs, ed
    else:
        ed, inputs_ex, ed_ex = None, None, None

    # Autoencoding.
    losses = {
        "extra": tf.constant(0.0),
        "latent_pred": tf.constant(0.0),
        "neg_q_entropy": tf.constant(0.0)
    }
    if hparams.do_ae:
        # flatten here
        original_targets_shape = tf.shape(targets)
        if hparams.task == "image":
            cia.maybe_reshape_4d_to_3d(targets)
        if hparams.task == "translate":
            if inputs is not None:
                max_targets_len_from_inputs = tf.concat([inputs, inputs],
                                                        axis=1)
            else:
                max_targets_len_from_inputs = targets
        else:
            assert hparams.task == "image"
            max_targets_len_from_inputs = targets
        if hparams.word_shuffle:
            tf.logging.info("Using word shuffle with rate = {}".format(
                hparams.word_shuffle))
            targets_idx = tf.range(start=0,
                                   limit=common_layers.shape_list(targets)[1],
                                   delta=1)
            targets_idx = tf.to_float(targets_idx)
            noise = tf.random_uniform(
                shape=common_layers.shape_list(targets_idx),
                minval=0,
                maxval=1 + hparams.word_shuffle)
            targets_idx += noise
            permutation = tf.contrib.framework.argsort(targets_idx)
            targets_permuted = tf.gather(targets, indices=permutation, axis=1)
            targets = targets_permuted
        targets, _ = common_layers.pad_to_same_length(
            targets,
            max_targets_len_from_inputs,
            final_length_divisible_by=2**hparams.num_compress_steps)
        if hparams.word_dropout:
            mask = tf.random_uniform(shape=common_layers.shape_list(targets),
                                     minval=0.0,
                                     maxval=1.0)
            targets_noisy = tf.where(mask > hparams.word_dropout, targets,
                                     tf.zeros_like(targets))
        else:
            targets_noisy = targets
        targets_c = compress(targets_noisy, inputs, False, hparams, "compress")
        if hparams.mode != tf.estimator.ModeKeys.PREDICT:
            # Compress and bottleneck.
            latents_dense, latents_discrete, extra_loss, embed, neg_q_entropy = (
                hparams.bottleneck(inputs=targets_c,
                                   filter_size=hparams.compress_filter_size,
                                   mode=hparams.mode,
                                   name="vc"))
            if _DO_SUMMARIES:
                tf.summary.histogram(
                    "b0", tf.reshape(latents_discrete[:, 0, :], [-1]))
            pc = common_layers.inverse_exp_decay(hparams.startup_steps)
            pc = pc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0
            cond = tf.less(tf.random_uniform([batch_size]), pc)
            latents_dense = tf.where(cond, latents_dense, targets_c)
            # TODO(lukaszkaiser): return extra losses batchwise, multiply before mean.
            losses["extra"] = extra_loss * tf.reduce_mean(tf.to_float(cond))
            # Extra loss predicting latent code from input. Discrete only.
            if hparams.bottleneck_kind not in ["dense", "vae"]:
                latents_pred = decode_transformer(inputs_ex,
                                                  ed_ex,
                                                  embed(latents_discrete),
                                                  hparams,
                                                  "extra",
                                                  task="translate")
                _, latent_pred_loss = ae_latent_softmax(
                    latents_pred, tf.stop_gradient(latents_discrete), hparams)

                # Scale by latent dimension for summary so we can compare across
                # batches.
                if _DO_SUMMARIES:
                    tf.summary.scalar("latent_pred_loss_mean",
                                      tf.reduce_mean(latent_pred_loss))
                if hparams.sum_over_latents:
                    latent_pred_loss = tf.reduce_sum(latent_pred_loss, [1, 2])

                losses["latent_pred"] = tf.reduce_mean(
                    latent_pred_loss * tf.to_float(cond)) * hparams.prior_scale
                losses["neg_q_entropy"] = neg_q_entropy * hparams.entropy_scale
            else:
                inputs_c = decode_transformer(inputs, ed, targets_c, hparams,
                                              "dec_c")
                losses["latent_pred"] = tf.reduce_mean(
                    (inputs_c - targets_c)**2) * 20

                def bn_inputs():
                    with tf.variable_scope(tf.get_variable_scope(),
                                           reuse=True):
                        bn, _, _, _, _ = hparams.bottleneck(
                            inputs=inputs_c,
                            filter_size=hparams.compress_filter_size,
                            mode=hparams.mode,
                            name="vc")
                    return bn

                inputs_c = bn_inputs()
                ptc = 1.0 - common_layers.inverse_lin_decay(200000) * 0.5
                ptc = ptc if hparams.mode == tf.estimator.ModeKeys.TRAIN else 1.0
                latents_dense = tf.where(
                    tf.less(tf.random_uniform([batch_size]), ptc),
                    latents_dense, inputs_c)
        else:
            if hparams.bottleneck_kind in ["dense", "vae"]:
                inputs_c = decode_transformer(inputs, ed, targets_c, hparams,
                                              "dec_c")
                latents_dense, _, _, _, _ = hparams.bottleneck(
                    inputs=inputs_c,
                    filter_size=hparams.compress_filter_size,
                    mode=hparams.mode,
                    name="vc")
            else:
                latent_len = common_layers.shape_list(targets_c)[1]
                _, _, _, embed, _ = hparams.bottleneck(
                    inputs=targets_c,
                    filter_size=hparams.compress_filter_size,
                    name="vc")
                latents_dense = tf.zeros_like(targets_c[:, :latent_len, :, :])
                if cache is None:
                    cache = ae_latent_sample(latents_dense, inputs_ex, ed_ex,
                                             embed, 16, hparams)
                latents_dense = embed(cache)
        # Postprocess.
        d = latents_dense
        latent_len = common_layers.shape_list(latents_dense)[1]
        if isinstance(latent_len, tf.Tensor):
            # TODO(trandustin): Fix this in a better manner.
            latent_len = max(1000, hparams.max_length)
        pos = tf.get_variable("pos",
                              [1, latent_len + 1, 1, hparams.hidden_size])
        pos = pos[:, :common_layers.shape_list(latents_dense)[1] + 1, :, :]
        latents_dense = tf.pad(latents_dense,
                               [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos

        # decompressing the dense latents
        for i in range(hparams.num_compress_steps):
            j = hparams.num_compress_steps - i - 1
            d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j)
            if inputs is not None and hparams.do_attend_decompress:
                d = attend(d, inputs, hparams, "decompress_attend_%d" % j)
            d = decompress_step(d, hparams, i > 0, False, "decompress_%d" % j)

        # Masking.
        if hparams.do_mask:
            masking = common_layers.inverse_lin_decay(
                hparams.mask_startup_steps)
            masking *= common_layers.inverse_exp_decay(
                hparams.mask_startup_steps // 4)  # Not much at start.
            if not hparams.do_refine:
                masking -= tf.random_uniform([]) * hparams.unmasked_percentage
            masking = tf.minimum(tf.maximum(masking, 0.0), 1.0)
            if hparams.use_predict_mask:
                masking = predict_mask
            if hparams.mode == tf.estimator.ModeKeys.PREDICT:
                masking = predict_mask
            mask = tf.less(
                masking,
                tf.random_uniform(common_layers.shape_list(targets)[:-1]))
            mask = tf.expand_dims(tf.to_float(mask), 3)

            # targets is always [batch, length, 1, depth]
            targets = mask * targets + (1.0 - mask) * d
            # reshape back to 4d here
            if hparams.task == "image":
                targets = tf.reshape(targets, original_targets_shape)

    res = decode_transformer(inputs,
                             ed,
                             targets,
                             hparams,
                             "decoder",
                             causal=hparams.causal)
    if hparams.do_ae:
        if hparams.do_mask and hparams.do_refine:

            def refine_res():
                # return residual_conv(res, 1, (5, 1), hparams, "refine")
                r, _ = encode(tf.squeeze(res, axis=[2]), target_space, hparams,
                              "refine_enc")
                return tf.expand_dims(r, axis=2)

            masked_batches = tf.reduce_sum(mask, axis=[1, 2, 3])
            all_masked = tf.less(masked_batches, 0.1)
            res = tf.where(all_masked, refine_res(), res)
        # We'll start training the extra model of latents after mask_startup_steps.
        nonlatent_steps = hparams.mask_startup_steps
        latent_time = tf.less(nonlatent_steps,
                              tf.to_int32(tf.train.get_global_step()))
        losses["latent_pred"] *= tf.to_float(latent_time)

    # res was generated from padded targets, which means it has some extra
    # elements. These can cause shape problems when computing loss with respect to
    # the original (unpadded) targets. So we remove their extra elements here.
    res = res[:, :original_targets_shape[1], :, :]
    return res, losses, cache
Example #55
0
def _get_dictionary_tensor( dictionary_path, charset ):
    return tf.sparse_tensor_to_dense( tf.to_int32(
	dictionary_from_file( dictionary_path, charset )))
Example #56
0
    def _preprocess(self, features, labels):
        """Model-specific preprocessing for features and labels:

    - Creates vocabulary lookup tables for source and target vocab
    - Converts tokens into vocabulary ids
    """

        # Create vocabulary lookup for source
        source_vocab_to_id, source_id_to_vocab, source_word_to_count, _ = \
          vocab.create_vocabulary_lookup_table(self.source_vocab_info.path)

        source_candidate_vocab_to_id, source_candidate_id_to_vocab, source_candidate_word_to_count, _ = \
            vocab.create_vocabulary_lookup_table(self.source_candidate_vocab_info.path)

        # Create vocabulary look for target
        target_vocab_to_id, target_id_to_vocab, target_word_to_count, _ = \
          vocab.create_vocabulary_lookup_table(self.target_vocab_info.path)

        # Add vocab tables to graph colection so that we can access them in
        # other places.
        graph_utils.add_dict_to_collection(
            {
                "source_vocab_to_id": source_vocab_to_id,
                "source_id_to_vocab": source_id_to_vocab,
                "source_word_to_count": source_word_to_count,
                "source_candidate_vocab_to_id": source_candidate_vocab_to_id,
                "source_candidate_id_to_vocab": source_candidate_id_to_vocab,
                "source_candidate_word_to_count":
                source_candidate_word_to_count,
                "target_vocab_to_id": target_vocab_to_id,
                "target_id_to_vocab": target_id_to_vocab,
                "target_word_to_count": target_word_to_count
            }, "vocab_tables")

        # Slice source to max_len
        if self.params["source.max_seq_len"] is not None:
            features["source_tokens"] = features[
                "source_tokens"][:, :self.params["source.max_seq_len"]]
            features["source_len"] = tf.minimum(
                features["source_len"], self.params["source.max_seq_len"])
        # Slice source_candidate to max_len
        if self.params["source_candidate.max_seq_len"] is not None:
            features["source_candidate_tokens"] = features[
                "source_candidate_tokens"][:, :self.params[
                    "source_candidate.max_seq_len"]]
            features["source_candidate_len"] = tf.minimum(
                features["source_candidate_len"],
                self.params["source_candidate.max_seq_len"])

        # Look up the source ids in the vocabulary
        features["source_ids"] = source_vocab_to_id.lookup(
            features["source_tokens"])
        features["source_candidate_ids"] = source_candidate_vocab_to_id.lookup(
            features["source_candidate_tokens"])
        # Maybe reverse the source
        if self.params["source.reverse"] is True:
            features["source_ids"] = tf.reverse_sequence(
                input=features["source_ids"],
                seq_lengths=features["source_len"],
                seq_dim=1,
                batch_dim=0,
                name=None)
            features["source_candidate_ids"] = tf.reverse_sequence(
                input=features["source_candidate_ids"],
                seq_lengths=features["source_candidate_len"],
                seq_dim=1,
                batch_dim=0,
                name=None)

        features["source_len"] = tf.to_int32(features["source_len"])
        tf.summary.histogram("source_len", tf.to_float(features["source_len"]))
        features["source_candidate_len"] = tf.to_int32(
            features["source_candidate_len"])
        tf.summary.histogram("source_candidate_len",
                             tf.to_float(features["source_candidate_len"]))

        if labels is None:
            return features, None

        labels = labels.copy()

        # Slices targets to max length
        if self.params["target.max_seq_len"] is not None:
            labels["target_tokens"] = labels[
                "target_tokens"][:, :self.params["target.max_seq_len"]]
            labels["target_len"] = tf.minimum(
                labels["target_len"], self.params["target.max_seq_len"])

        # Look up the target ids in the vocabulary
        labels["target_ids"] = target_vocab_to_id.lookup(
            labels["target_tokens"])

        labels["target_len"] = tf.to_int32(labels["target_len"])
        tf.summary.histogram("target_len", tf.to_float(labels["target_len"]))

        # Keep track of the number of processed tokens
        num_tokens = tf.reduce_sum(labels["target_len"])
        num_tokens += tf.reduce_sum(features["source_len"])
        num_tokens += tf.reduce_sum(features["source_candidate_len"])
        token_counter_var = tf.Variable(0, "tokens_counter")
        total_tokens = tf.assign_add(token_counter_var, num_tokens)
        tf.summary.scalar("num_tokens", total_tokens)

        with tf.control_dependencies([total_tokens]):
            features["source_tokens"] = tf.identity(features["source_tokens"])
            features["source_candidate_tokens"] = tf.identity(
                features["source_candidate_tokens"])

        # Add to graph collection for later use
        graph_utils.add_dict_to_collection(features, "features")
        if labels:
            graph_utils.add_dict_to_collection(labels, "labels")

        print("attention_biseqseq features:{} labels:{}".format(
            features, labels))
        return features, labels
Example #57
0
def _build_metrics(labels, predictions, weights, batch_losses):
    """Builds TensorFlow operations to compute model evaluation metrics.

  Args:
    labels: Tensor with shape [batch_size].
    predictions: Tensor with shape [batch_size, output_dim].
    weights: Tensor with shape [batch_size].
    batch_losses: Tensor with shape [batch_size].

  Returns:
    A dictionary {metric_name: (metric_value, update_op).
  """
    # Compute the predicted labels.
    assert len(predictions.shape) == 2
    binary_classification = (predictions.shape[1] == 1)
    if binary_classification:
        predictions = tf.squeeze(predictions, axis=[1])
        predicted_labels = tf.to_int32(
            tf.greater(predictions, 0.5), name="predicted_labels")
    else:
        predicted_labels = tf.argmax(
            predictions, 1, name="predicted_labels", output_type=tf.int32)

    metrics = {}
    with tf.variable_scope("metrics"):
        # Total number of examples.
        num_examples = _metric_variable("num_examples", [], tf.float32)
        update_num_examples = tf.assign_add(num_examples, tf.reduce_sum(weights))
        metrics["num_examples"] = (num_examples.read_value(), update_num_examples)

        # Accuracy metrics.
        num_correct = _metric_variable("num_correct", [], tf.float32)
        is_correct = weights * tf.to_float(tf.equal(labels, predicted_labels))
        update_num_correct = tf.assign_add(num_correct, tf.reduce_sum(is_correct))
        metrics["accuracy/num_correct"] = (num_correct.read_value(),
                                           update_num_correct)
        accuracy = tf.div(num_correct, num_examples, name="accuracy")
        metrics["accuracy/accuracy"] = (accuracy, tf.no_op())

        # Weighted cross-entropy loss.
        metrics["losses/weighted_cross_entropy"] = tf.metrics.mean(
            batch_losses, weights=weights, name="cross_entropy_loss")

        # Possibly create additional metrics for binary classification.
        if binary_classification:
            labels = tf.cast(labels, dtype=tf.bool)
            predicted_labels = tf.cast(predicted_labels, dtype=tf.bool)

            # AUC.
            metrics["auc"] = tf.metrics.auc(
                labels, predictions, weights=weights, num_thresholds=1000)

            def _count_condition(name, labels_value, predicted_value):
                """Creates a counter for given values of predictions and labels."""
                count = _metric_variable(name, [], tf.float32)
                is_equal = tf.to_float(
                    tf.logical_and(
                        tf.equal(labels, labels_value),
                        tf.equal(predicted_labels, predicted_value)))
                update_op = tf.assign_add(count, tf.reduce_sum(weights * is_equal))
                return count.read_value(), update_op

            # Confusion matrix metrics.
            metrics["confusion_matrix/true_positives"] = _count_condition(
                "true_positives", labels_value=True, predicted_value=True)
            metrics["confusion_matrix/false_positives"] = _count_condition(
                "false_positives", labels_value=False, predicted_value=True)
            metrics["confusion_matrix/true_negatives"] = _count_condition(
                "true_negatives", labels_value=False, predicted_value=False)
            metrics["confusion_matrix/false_negatives"] = _count_condition(
                "false_negatives", labels_value=True, predicted_value=False)

    return metrics
Example #58
0
    def call(self, inputs, training=None, **kwargs):
        # get offset, shape [batch_size, out_h, out_w, filter_h, * filter_w * channel_out * 2]
        offset = tf.nn.conv2d(inputs,
                              filter=self.offset_layer_kernel,
                              strides=[1, *self.strides, 1],
                              padding=self.padding.upper(),
                              dilations=[1, *self.dilation_rate, 1])
        offset += self.offset_layer_bias

        # add padding if needed
        inputs = self._pad_input(inputs)

        # some length
        batch_size = int(inputs.get_shape()[0])
        channel_in = int(inputs.get_shape()[-1])
        in_h, in_w = [int(i) for i in inputs.get_shape()[1:3]
                      ]  # input feature map size
        out_h, out_w = [int(i) for i in offset.get_shape()[1:3]
                        ]  # output feature map size
        filter_h, filter_w = self.kernel_size

        # get x, y axis offset
        offset = tf.reshape(offset, [batch_size, out_h, out_w, -1, 2])
        y_off, x_off = offset[:, :, :, :, 0], offset[:, :, :, :, 1]

        # input feature map gird coordinates
        y, x = self._get_conv_indices([in_h, in_w])
        y, x = [tf.expand_dims(i, axis=-1) for i in [y, x]]
        y, x = [
            tf.tile(i, [batch_size, 1, 1, 1, self.num_deformable_group])
            for i in [y, x]
        ]
        y, x = [tf.reshape(i, [*i.shape[0:3], -1]) for i in [y, x]]
        y, x = [tf.to_float(i) for i in [y, x]]

        # add offset
        y, x = y + y_off, x + x_off
        y = tf.clip_by_value(y, 0, in_h - 1)
        x = tf.clip_by_value(x, 0, in_w - 1)

        # get four coordinates of points around (x, y)
        y0, x0 = [tf.to_int32(tf.floor(i)) for i in [y, x]]
        y1, x1 = y0 + 1, x0 + 1
        # clip
        y0, y1 = [tf.clip_by_value(i, 0, in_h - 1) for i in [y0, y1]]
        x0, x1 = [tf.clip_by_value(i, 0, in_w - 1) for i in [x0, x1]]

        # get pixel values
        indices = [[y0, x0], [y0, x1], [y1, x0], [y1, x1]]
        p0, p1, p2, p3 = [
            DeformableConvLayer._get_pixel_values_at_point(inputs, i)
            for i in indices
        ]

        # cast to float
        x0, x1, y0, y1 = [tf.to_float(i) for i in [x0, x1, y0, y1]]
        # weights
        w0 = (y1 - y) * (x1 - x)
        w1 = (y1 - y) * (x - x0)
        w2 = (y - y0) * (x1 - x)
        w3 = (y - y0) * (x - x0)
        # expand dim for broadcast
        w0, w1, w2, w3 = [tf.expand_dims(i, axis=-1) for i in [w0, w1, w2, w3]]
        # bilinear interpolation
        pixels = tf.add_n([w0 * p0, w1 * p1, w2 * p2, w3 * p3])

        # reshape the "big" feature map
        pixels = tf.reshape(pixels, [
            batch_size, out_h, out_w, filter_h, filter_w,
            self.num_deformable_group, channel_in
        ])
        pixels = tf.transpose(pixels, [0, 1, 3, 2, 4, 5, 6])
        pixels = tf.reshape(pixels, [
            batch_size, out_h * filter_h, out_w * filter_w,
            self.num_deformable_group, channel_in
        ])

        # copy channels to same group
        feat_in_group = self.filters // self.num_deformable_group
        pixels = tf.tile(pixels, [1, 1, 1, 1, feat_in_group])
        pixels = tf.reshape(
            pixels, [batch_size, out_h * filter_h, out_w * filter_w, -1])

        # depth-wise conv
        out = tf.nn.depthwise_conv2d(pixels, self.kernel,
                                     [1, filter_h, filter_w, 1], 'VALID')
        # add the output feature maps in the same group
        out = tf.reshape(out,
                         [batch_size, out_h, out_w, self.filters, channel_in])
        out = tf.reduce_sum(out, axis=-1)
        if self.use_bias:
            out += self.bias
        return self.activation(out)
    def _init_env(self):
        FLAGS.use_tpu = False
        #tf.logging.set_verbosity(tf.logging.DEBUG)
        tf.logging.info("Import usr dir from %s", self._usr_dir)
        if self._usr_dir != None:
            #usr_dir.import_usr_dir(FLAGS.t2t_usr_dir)
            usr_dir.import_usr_dir(self._usr_dir)
        tf.logging.info("Start to create hparams,for %s of %s", self._problem,
                        self._hparams_set)

        self._hparams = create_hparams()

        self._hparams_decode = create_decode_hparams(
            extra_length=self._extra_length,
            batch_size=self._batch_size,
            beam_size=self._beam_size,
            alpha=self._alpha,
            return_beams=self._return_beams,
            write_beam_scores=self._write_beam_scores,
            force_decode_length=self._force_decode_length)

        self.estimator = trainer_lib.create_estimator(
            FLAGS.model,
            self._hparams,
            t2t_trainer.create_run_config(self._hparams),
            decode_hparams=self._hparams_decode,
            use_tpu=False)

        tf.logging.info("Finish intialize environment")

        ####### problem type :输出分类 还是序列 还是语言模型
        #self.problem_type = self._hparams.problem_hparams[0].target_modality[0] #class? symble
        self.problem_type = self._hparams.problem_hparams.target_modality[0]
        #self._whether_has_inputs = self._hparams.problem[0].has_inputs
        self._whether_has_inputs = self._hparams.problem.has_inputs
        self._beam_size = 1 if self._customer_problem_type == 'classification' else self._beam_size

        ### make input placeholder
        #self._inputs_ph = tf.placeholder(dtype=tf.int32)  # shape not specified,any shape

        x = tf.placeholder(dtype=tf.int32)
        x.set_shape([None, None])  # ? -> (?,?)
        x = tf.expand_dims(x, axis=[2])  # -> (?,?,1)
        x = tf.to_int32(x)
        self._inputs_ph = x

        #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1])
        batch_inputs = x

        # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1])

        #targets_ph = tf.placeholder(dtype=tf.int32)
        #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1])
        self._features = {
            "inputs": batch_inputs,
            "problem_choice": 0,  # We run on the first problem here.
            "input_space_id": self._hparams.problem_hparams.input_space_id,
            "target_space_id": self._hparams.problem_hparams.target_space_id
        }
        ### 加入 decode length  变长的
        self.input_extra_length_ph = tf.placeholder(dtype=tf.int32, shape=[])
        self._features[
            'decode_length'] = self.input_extra_length_ph  # total_decode=input_len+extra_len|  extra of chunkProblem =0
        # real_decode_length=len(input)+extra_length
        ##
        #self._features['decode_length_decide_end'] = True

        #### 如果是relative 参数
        if self._hparams_set == "transformer_relative":
            del self._features['problem_choice']
            del self._features['input_space_id']
            del self._features['target_space_id']

        if self._customer_problem_type == 'languageModel_pp':
            del self._features['problem_choice']
            del self._features['input_space_id']
            del self._features['target_space_id']
        if self._model_name in ['slice_net', 'transformer_encoder']:
            del self._features['problem_choice']
            del self._features['input_space_id']
            del self._features['target_space_id']
        if self._model_name == 'transformer' and self._customer_problem_type == 'classification':
            del self._features['problem_choice']
            del self._features['input_space_id']
            del self._features['target_space_id']

        ###### target if transformer_scorer
        if self._customer_problem_type == 'classification':
            self._targets_ph = tf.placeholder(tf.int32,
                                              shape=(None, None, None, None),
                                              name='targets')
            self._features['targets'] = self._targets_ph  # batch targets

        if self._customer_problem_type == 'languageModel_pp':
            self._targets_ph = tf.placeholder(tf.int32,
                                              shape=(None, None, None, None),
                                              name='targets')
            self._features['targets'] = self._targets_ph

        #### mode
        mode = tf.estimator.ModeKeys.PREDICT
        if self._customer_problem_type == 'languageModel_pp':
            mode = tf.estimator.ModeKeys.EVAL
        elif self._customer_problem_type == 'classification' and 'score' not in self._model_name:
            mode = tf.estimator.ModeKeys.EVAL
        # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams,
        #                                         problem_names=[self._problem], decode_hparams=self._hparams_dc)
        predictions_dict = self.estimator._call_model_fn(
            self._features, None, mode,
            t2t_trainer.create_run_config(self._hparams))
        self._predictions_dict = predictions_dict.predictions
        # score -> score_yr
        if self._customer_problem_type == 'classification' and 'score' in self._model_name:
            self._score = predictions_dict.predictions.get('scores')
            if self._score != None:  #[batch,beam] [batch,]
                self._predictions_dict['scores_class'] = tf.exp(
                    common_layers.log_prob_from_logits(self._score))
        elif self._customer_problem_type == 'classification' and 'score' not in self._model_name:
            self._score = predictions_dict.predictions.get('predictions')
            if self._score != None:  #[batch,beam] [batch,]
                self._predictions_dict['scores_class'] = tf.exp(
                    common_layers.log_prob_from_logits(self._score))
        #self._predictions = self._predictions_dict["outputs"]
        # self._scores=predictions_dict['scores'] not return when greedy search
        tf.logging.info("Start to init tf session")
        if self._isGpu:
            print('Using GPU in Decoder')
            gpu_options = tf.GPUOptions(
                per_process_gpu_memory_fraction=self._fraction)
            self._sess = tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False,
                                      gpu_options=gpu_options))
        else:
            print('Using CPU in Decoder')
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0)
            config = tf.ConfigProto(gpu_options=gpu_options)
            config.allow_soft_placement = True
            config.log_device_placement = False
            self._sess = tf.Session(config=config)
        with self._sess.as_default():
            ckpt = saver_mod.get_checkpoint_state(self._model_dir)
            saver = tf.train.Saver(allow_empty=True)
            tf.logging.info("Start to restore the parameters from %s",
                            ckpt.model_checkpoint_path)
            saver.restore(self._sess, ckpt.model_checkpoint_path)
        tf.logging.info("Finish intialize environment")
Example #60
0
 def Single_acc(self):
     correct_prediction = tf.equal(tf.to_int32(tf.argmax(self.y, axis=1)),
                                   self.argmax_idx)
     self.acc_num = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))