コード例 #1
0
  def _preprocess(self, features, labels):
    features, labels = super(DiscriminatorSeq2Seq, self)._preprocess(
        features, labels)

    # Convert domain string to a class id
    domains = tf.map_fn(
      lambda x: tf.case({
        tf.equal(x, "domain=aspec"): lambda: tf.constant(0),
        tf.equal(x, "domain=subtitles"): lambda: tf.constant(1)},
        default=lambda: tf.constant(-1),
        exclusive=True),
      elems=labels["target_tokens"][:, 1],
      dtype=tf.int32)

    labels["domain"] = domains

    # Remove the domain token from the data
    # Token #0 is SEQUENCE_START. Token #1 is the domain token.
    labels["target_tokens"] = tf.concat(
        [labels["target_tokens"][:, 0:1], labels["target_tokens"][:, 2:]], 1)
    labels["target_ids"] = tf.concat(
        [labels["target_ids"][:, 0:1], labels["target_ids"][:, 2:]], 1)
    labels["target_len"] = labels["target_len"] - 1

    return features, labels
コード例 #2
0
def focal_loss(logits, targets, alpha, gamma, normalizer):
  """Compute the focal loss between `logits` and the golden `target` values.

  Focal loss = -(1-alpha)^gamma * log(pt)
  where pt is the probability of being classified to the true class.

  Args:
    logits: A float32 tensor of size
      [batch, height_in, width_in, num_predictions].
    targets: A float32 tensor of size
      [batch, height_in, width_in, num_predictions].
    alpha: A float32 scalar multiplying alpha to the loss from positive examples
      and (1-alpha) to the loss from negative examples.
    gamma: A float32 scalar modulating loss from hard and easy examples.
    normalizer: A float32 scalar normalizes the total loss from all examples.
  Returns:
    loss: A float32 scalar representing normalized total loss.
  """
  with tf.name_scope('focal_loss'):
    positive_label_mask = tf.equal(targets, 1.0)
    cross_entropy = (
        tf.nn.sigmoid_cross_entropy_with_logits(labels=targets, logits=logits))
    probs = tf.sigmoid(logits)
    probs_gt = tf.where(positive_label_mask, probs, 1.0 - probs)
    # With small gamma, the implementation could produce NaN during back prop.
    modulator = tf.pow(1.0 - probs_gt, gamma)
    loss = modulator * cross_entropy
    weighted_loss = tf.where(positive_label_mask, alpha * loss,
                             (1.0 - alpha) * loss)
    total_loss = tf.reduce_sum(weighted_loss)
    total_loss /= normalizer
  return total_loss
コード例 #3
0
    def crop_gt_masks(self, gt_mask_size):
        """Crops the ground truth binary masks and resize to fixed-size masks."""
        num_boxes = tf.shape(self._boxes)[0]
        num_masks = tf.shape(self._masks)[0]
        assert_length = tf.Assert(tf.equal(num_boxes, num_masks), [num_masks])

        def padded_bounding_box_fn():
            return tf.reshape(self._masks,
                              [-1, self._ori_height, self._ori_width, 1])

        def zeroed_box_fn():
            return tf.zeros([0, self._ori_height, self._ori_width, 1])

        num_masks = tf.shape(self._masks)[0]
        # Check if there is any instance in this image or not.
        scaled_masks = tf.cond(num_masks > 0, padded_bounding_box_fn,
                               zeroed_box_fn)
        with tf.control_dependencies([assert_length]):
            cropped_gt_masks = tf.image.crop_and_resize(
                image=scaled_masks,
                boxes=self._boxes,
                box_ind=tf.range(num_masks, dtype=tf.int32),
                crop_size=[gt_mask_size, gt_mask_size],
                method='bilinear')[:, :, :, 0]
        cropped_gt_masks = tf.pad(cropped_gt_masks,
                                  paddings=tf.constant([[
                                      0,
                                      0,
                                  ], [
                                      2,
                                      2,
                                  ], [2, 2]]),
                                  mode='CONSTANT',
                                  constant_values=0.)
        return cropped_gt_masks
コード例 #4
0
ファイル: model_utils.py プロジェクト: samiraabnar/language
def hamming_loss(preds, targets, sign=False):
  """Implements hamming loss.

  Args:
    preds: Tensor of predicted values.
    targets: Tensor of target values.
    sign (bool): Set to True if targets={-1, 1} to take the sign of preds
    before calculating loss.

  Returns:
    A tf.metrics tuple containing the proportion of incorrect predictions and an
    update op for the metric.
  """
  if sign:
    preds = tf.sign(preds)
  equal = tf.equal(preds, tf.cast(targets, preds.dtype))
  proportion_correct, update_op = tf.metrics.mean(tf.cast(equal, tf.float32))
  return 1 - proportion_correct, update_op
コード例 #5
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        features: A dictionary that contains the image and auxiliary
          information. The following describes {key: value} pairs in the
          dictionary.
          image: An image tensor that is preprocessed to have normalized value
            and fixed dimension [image_size, image_size, 3]
          image_info: Image information that includes the original height and
            width, the scale of the processed image to the original image, and
            the scaled height and width.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
        labels: (only for training) A dictionary that contains groundtruth
          labels. The following describes {key: value} pairs in the dictionary.
          score_targets_dict: An ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors]. The height_l and width_l
            represent the dimension of objectiveness score at l-th level.
          box_targets_dict: An ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors * 4]. The height_l and
            width_l represent the dimension of bounding box regression output at
            l-th level.
          gt_boxes: Groundtruth bounding box annotations. The box is represented
             in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
             fixed dimension [self._max_num_instances, 4].
          gt_classes: Groundtruth classes annotations. The tennsor is padded
            with -1 to the fixed dimension [self._max_num_instances].
          cropped_gt_masks: Groundtruth masks cropped by the bounding box and
            resized to a fixed size determined by params['gt_mask_size']
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)

                image = data['image']
                source_id = data['source_id']
                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                if self._mode == tf.estimator.ModeKeys.PREDICT:
                    input_processor = InstanceSegmentationInputProcessor(
                        image, image_size, params['short_side_image_size'],
                        params['long_side_max_image_size'])
                    input_processor.normalize_image()
                    input_processor.set_scale_factors_to_mlperf_reference_size(
                    )
                    image = input_processor.resize_and_crop_image()
                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    image_info = input_processor.get_image_info()
                    return {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id
                    }

                # The following part is for training.
                instance_masks = data['groundtruth_instance_masks']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                if not params['use_category']:
                    classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

                if (params['skip_crowd_during_training']
                        and self._mode == tf.estimator.ModeKeys.TRAIN):
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)
                    instance_masks = tf.gather_nd(instance_masks, indices)

                input_processor = InstanceSegmentationInputProcessor(
                    image, image_size, params['short_side_image_size'],
                    params['long_side_max_image_size'], boxes, classes,
                    instance_masks)
                input_processor.normalize_image()
                if params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()

                input_processor.set_scale_factors_to_mlperf_reference_size()
                image = input_processor.resize_and_crop_image()

                boxes, classes = input_processor.resize_and_crop_boxes()
                cropped_gt_masks = input_processor.crop_gt_masks(
                    params['gt_mask_size'])

                image_info = input_processor.get_image_info()
                # Assign anchors.
                is_height_short_side = tf.less(image_info[3], image_info[4])
                score_targets, box_targets = tf.cond(
                    is_height_short_side,
                    lambda: anchor_labeler.label_anchors(boxes, classes),
                    lambda: height_long_side_anchor_labeler.label_anchors(boxes, classes))  # pylint: disable=line-too-long

                # Pad groundtruth data.
                boxes *= image_info[2]
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                # Pads cropped_gt_masks.
                cropped_gt_masks = tf.reshape(
                    cropped_gt_masks, [-1, (params['gt_mask_size'] + 4)**2])
                cropped_gt_masks = pad_to_fixed_size(
                    cropped_gt_masks, -1,
                    [self._max_num_instances, (params['gt_mask_size'] + 4)**2])
                cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                    self._max_num_instances, params['gt_mask_size'] + 4,
                    params['gt_mask_size'] + 4
                ])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)

                features = {}
                features['images'] = image
                features['image_info'] = image_info
                features['source_ids'] = source_id

                labels = {}
                for level in range(params['min_level'],
                                   params['max_level'] + 1):
                    labels['score_targets_%d' % level] = score_targets[level]
                    labels['box_targets_%d' % level] = box_targets[level]
                labels['gt_boxes'] = boxes
                labels['gt_classes'] = classes
                labels['cropped_gt_masks'] = cropped_gt_masks
                return features, labels
コード例 #6
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                # NOTE: The autoaugment method works best when used alongside the
                # standard horizontal flipping of images along with size jittering
                # and normalization.
                if params.get('autoaugment_policy',
                              None) and self._is_training:
                    image, boxes = autoaugment.distort_image_with_autoaugment(
                        image, boxes, params['autoaugment_policy'])

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, boxes, is_crowds, areas,
                        classes)
コード例 #7
0
def _local_perm(inputs, targets, is_masked, perm_size, seq_len):
  """
  Sample a permutation of the factorization order, and create an
  attention mask accordingly.

  Args:
    inputs: int64 Tensor in shape [seq_len], input ids.
    targets: int64 Tensor in shape [seq_len], target ids.
    is_masked: bool Tensor in shape [seq_len]. True means being selected
      for partial prediction.
    perm_size: the length of longest permutation. Could be set to be reuse_len.
      Should not be larger than reuse_len or there will be data leaks.
    seq_len: int, sequence length.
  """

  # Generate permutation indices
  index = tf.range(seq_len, dtype=tf.int64)
  index = tf.transpose(tf.reshape(index, [-1, perm_size]))
  index = tf.random_shuffle(index)
  index = tf.reshape(tf.transpose(index), [-1])

  # `perm_mask` and `target_mask`
  # non-functional tokens
  non_func_tokens = tf.logical_not(tf.logical_or(
      tf.equal(inputs, SEP_ID),
      tf.equal(inputs, CLS_ID)))

  non_mask_tokens = tf.logical_and(tf.logical_not(is_masked), non_func_tokens)
  masked_or_func_tokens = tf.logical_not(non_mask_tokens)

  # Set the permutation indices of non-masked (& non-funcional) tokens to the
  # smallest index (-1):
  # (1) they can be seen by all other positions
  # (2) they cannot see masked positions, so there won"t be information leak
  smallest_index = -tf.ones([seq_len], dtype=tf.int64)
  rev_index = tf.where(non_mask_tokens, smallest_index, index)

  # Create `target_mask`: non-funcional and maksed tokens
  # 1: use mask as input and have loss
  # 0: use token (or [SEP], [CLS]) as input and do not have loss
  target_tokens = tf.logical_and(masked_or_func_tokens, non_func_tokens)
  target_mask = tf.cast(target_tokens, tf.float32)

  # Create `perm_mask`
  # `target_tokens` cannot see themselves
  self_rev_index = tf.where(target_tokens, rev_index, rev_index + 1)

  # 1: cannot attend if i <= j and j is not non-masked (masked_or_func_tokens)
  # 0: can attend if i > j or j is non-masked
  perm_mask = tf.logical_and(
      self_rev_index[:, None] <= rev_index[None, :],
      masked_or_func_tokens)
  perm_mask = tf.cast(perm_mask, tf.float32)

  # new target: [next token] for LM and [curr token] (self) for PLM
  new_targets = tf.concat([inputs[0: 1], targets[: -1]],
                          axis=0)

  # construct inputs_k
  inputs_k = inputs

  # construct inputs_q
  inputs_q = target_mask

  return perm_mask, new_targets, target_mask, inputs_k, inputs_q