Esempio n. 1
0
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
  """Model definition entry.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.

  Raises:
    RuntimeError: if both ckpt and backbone_ckpt are set.
  """
  # Convert params (dict) to Config for easier access.
  if params['data_format'] == 'channels_first':
    features = tf.transpose(features, [0, 3, 1, 2])
  def _model_outputs(inputs):
    return model(inputs, config=hparams_config.Config(params))

  cls_outputs, box_outputs = utils.build_model_with_precision(
      params['precision'], _model_outputs, features)

  levels = cls_outputs.keys()
  for level in levels:
    cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
    box_outputs[level] = tf.cast(box_outputs[level], tf.float32)

  # First check if it is in PREDICT mode.
  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'image': features,
    }
    for level in levels:
      predictions['cls_outputs_%d' % level] = cls_outputs[level]
      predictions['box_outputs_%d' % level] = box_outputs[level]
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Set up training loss and learning rate.
  update_learning_rate_schedule_parameters(params)
  global_step = tf.train.get_or_create_global_step()
  learning_rate = learning_rate_schedule(params, global_step)

  # cls_loss and box_loss are for logging. only total_loss is optimized.
  det_loss, cls_loss, box_loss, box_iou_loss = detection_loss(
      cls_outputs, box_outputs, labels, params)
  l2loss = reg_l2_loss(params['weight_decay'])
  total_loss = det_loss + l2loss

  if mode == tf.estimator.ModeKeys.TRAIN:
    utils.scalar('lrn_rate', learning_rate)
    utils.scalar('trainloss/cls_loss', cls_loss)
    utils.scalar('trainloss/box_loss', box_loss)
    utils.scalar('trainloss/box_iou_loss', box_iou_loss)
    utils.scalar('trainloss/det_loss', det_loss)
    utils.scalar('trainloss/l2_loss', l2loss)
    utils.scalar('trainloss/loss', total_loss)

  moving_average_decay = params['moving_average_decay']
  if moving_average_decay:
    ema = tf.train.ExponentialMovingAverage(
        decay=moving_average_decay, num_updates=global_step)
    ema_vars = utils.get_ema_vars()

  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.MomentumOptimizer(
        learning_rate, momentum=params['momentum'])
    if params['use_tpu']:
      optimizer = tf.tpu.CrossShardOptimizer(optimizer)

    # Batch norm requires update_ops to be added as a train_op dependency.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    var_list = tf.trainable_variables()
    if variable_filter_fn:
      var_list = variable_filter_fn(var_list)

    if params.get('clip_gradients_norm', 0) > 0:
      logging.info('clip gradients norm by %f', params['clip_gradients_norm'])
      grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
      with tf.name_scope('clip'):
        grads = [gv[0] for gv in grads_and_vars]
        tvars = [gv[1] for gv in grads_and_vars]
        clipped_grads, gnorm = tf.clip_by_global_norm(
            grads, params['clip_gradients_norm'])
        utils.scalar('gnorm', gnorm)
        grads_and_vars = list(zip(clipped_grads, tvars))

      with tf.control_dependencies(update_ops):
        train_op = optimizer.apply_gradients(grads_and_vars, global_step)
    else:
      with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(
            total_loss, global_step, var_list=var_list)

    if moving_average_decay:
      with tf.control_dependencies([train_op]):
        train_op = ema.apply(ema_vars)

  else:
    train_op = None

  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:
    def metric_fn(**kwargs):
      """Returns a dictionary that has the evaluation metrics."""
      batch_size = params['batch_size']
      if params['use_tpu']:
        batch_size = params['batch_size'] * params['num_shards']
      eval_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])

      if params.get('testdev_dir', None):
        logging.info('Eval testdev_dir %s', params['testdev_dir'])
        coco_metrics = coco_metric_fn(
            batch_size,
            anchor_labeler,
            params['val_json_file'],
            testdev_dir=params['testdev_dir'],
            disable_pyfun=params.get('disable_pyfun', None),
            **kwargs)
      else:
        logging.info('Eval val with groudtruths %s.', params['val_json_file'])
        coco_metrics = coco_metric_fn(batch_size, anchor_labeler,
                                      params['val_json_file'], **kwargs)

      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics

    cls_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(cls_loss, 0), [params['batch_size'],]),
        [params['batch_size'], 1])
    box_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(box_loss, 0), [params['batch_size'],]),
        [params['batch_size'], 1])
    metric_fn_inputs = {
        'cls_loss_repeat': cls_loss_repeat,
        'box_loss_repeat': box_loss_repeat,
        'source_ids': labels['source_ids'],
        'groundtruth_data': labels['groundtruth_data'],
        'image_scales': labels['image_scales'],
    }
    add_metric_fn_inputs(params, cls_outputs, box_outputs, metric_fn_inputs)
    eval_metrics = (metric_fn, metric_fn_inputs)

  checkpoint = params.get('ckpt') or params.get('backbone_ckpt')

  if checkpoint and mode == tf.estimator.ModeKeys.TRAIN:
    # Initialize the model from an EfficientDet or backbone checkpoint.
    if params.get('ckpt') and params.get('backbone_ckpt'):
      raise RuntimeError(
          '--backbone_ckpt and --checkpoint are mutually exclusive')

    if params.get('backbone_ckpt'):
      var_scope = params['backbone_name'] + '/'
      if params['ckpt_var_scope'] is None:
        # Use backbone name as default checkpoint scope.
        ckpt_scope = params['backbone_name'] + '/'
      else:
        ckpt_scope = params['ckpt_var_scope'] + '/'
    else:
      # Load every var in the given checkpoint
      var_scope = ckpt_scope = '/'

    def scaffold_fn():
      """Loads pretrained model through scaffold function."""
      logging.info('restore variables from %s', checkpoint)

      var_map = utils.get_ckpt_var_map(
          ckpt_path=checkpoint,
          ckpt_scope=ckpt_scope,
          var_scope=var_scope,
          var_exclude_expr=params.get('var_exclude_expr', None))

      tf.train.init_from_checkpoint(checkpoint, var_map)

      return tf.train.Scaffold()
  elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay:
    def scaffold_fn():
      """Load moving average variables for eval."""
      logging.info('Load EMA vars with ema_decay=%f', moving_average_decay)
      restore_vars_dict = ema.variables_to_restore(ema_vars)
      saver = tf.train.Saver(restore_vars_dict)
      return tf.train.Scaffold(saver=saver)
  else:
    scaffold_fn = None

  return tf.estimator.tpu.TPUEstimatorSpec(
      mode=mode,
      loss=total_loss,
      train_op=train_op,
      eval_metrics=eval_metrics,
      host_call=utils.get_tpu_host_call(global_step, params),
      scaffold_fn=scaffold_fn)
Esempio n. 2
0
def style_image_inputs(style_dataset_file,
                       batch_size=None,
                       image_size=None,
                       square_crop=False,
                       shuffle=True):
    """Loads a batch of random style image given the path of tfrecord dataset.

    Args:
      style_dataset_file: str, path to the tfrecord dataset of style files.
          The dataset is produced via the create_style_dataset.py script and is
          made of Example protobufs with the following features:
          * 'image_raw': byte encoding of the JPEG string of the style image.
          * 'label': integer identifier of the style image in [0, N - 1], where
                N is the number of examples in the dataset.
          * 'vgg_16/<LAYER_NAME>': Gram matrix at layer <LAYER_NAME> of the VGG-16
                network (<LAYER_NAME> in {conv,pool}{1,2,3,4,5}) for the style
                image.
      batch_size: int. If provided, batches style images. Defaults to None.
      image_size: int. The images will be resized bilinearly so that the smallest
          side has size image_size. Defaults to None.
      square_crop: bool. If True, square-crops to [image_size, image_size].
          Defaults to False.
      shuffle: bool, whether to shuffle style files at random. Defaults to True.

    Returns:
      If batch_size is defined, a 4-D tensor of shape [batch_size, ?, ?, 3] with
      values in [0, 1] for the style image, and 1-D tensor for the style label.

    Raises:
      ValueError: if center cropping is requested but no image size is provided,
          or if batch size is specified but center-cropping is not requested.
    """
    vgg_layers = [
        'vgg_16/conv1', 'vgg_16/pool1', 'vgg_16/conv2', 'vgg_16/pool2',
        'vgg_16/conv3', 'vgg_16/pool3', 'vgg_16/conv4', 'vgg_16/pool4',
        'vgg_16/conv5', 'vgg_16/pool5'
    ]

    if square_crop and image_size is None:
        raise ValueError('center-cropping requires specifying the image size.')
    if batch_size is not None and not square_crop:
        raise ValueError('batching requires center-cropping.')

    with tf.name_scope('style_image_processing'):
        filename_queue = tf.train.string_input_producer([style_dataset_file],
                                                        shuffle=False,
                                                        capacity=1,
                                                        name='filename_queue')
        if shuffle:
            examples_queue = tf.RandomShuffleQueue(
                capacity=64,
                min_after_dequeue=32,
                dtypes=[tf.string],
                name='random_examples_queue')
        else:
            examples_queue = tf.FIFOQueue(capacity=64,
                                          dtypes=[tf.string],
                                          name='fifo_examples_queue')
        reader = tf.TFRecordReader()
        _, value = reader.read(filename_queue)
        enqueue_ops = [examples_queue.enqueue([value])]
        tf.train.queue_runner.add_queue_runner(
            tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
        example_serialized = examples_queue.dequeue()
        features = tf.parse_single_example(
            example_serialized,
            features={
                'label': tf.FixedLenFeature([], tf.int64),
                'image_raw': tf.FixedLenFeature([], tf.string),
                'vgg_16/conv1': tf.FixedLenFeature([64, 64], tf.float32),
                'vgg_16/pool1': tf.FixedLenFeature([64, 64], tf.float32),
                'vgg_16/conv2': tf.FixedLenFeature([128, 128], tf.float32),
                'vgg_16/pool2': tf.FixedLenFeature([128, 128], tf.float32),
                'vgg_16/conv3': tf.FixedLenFeature([256, 256], tf.float32),
                'vgg_16/pool3': tf.FixedLenFeature([256, 256], tf.float32),
                'vgg_16/conv4': tf.FixedLenFeature([512, 512], tf.float32),
                'vgg_16/pool4': tf.FixedLenFeature([512, 512], tf.float32),
                'vgg_16/conv5': tf.FixedLenFeature([512, 512], tf.float32),
                'vgg_16/pool5': tf.FixedLenFeature([512, 512], tf.float32)
            })
        image = tf.image.decode_jpeg(features['image_raw'])
        label = features['label']
        gram_matrices = [features[vgg_layer] for vgg_layer in vgg_layers]
        image.set_shape([None, None, 3])

        if image_size:
            if square_crop:
                image = _aspect_preserving_resize(image, image_size + 2)
                image = _central_crop([image], image_size, image_size)[0]
                image.set_shape([image_size, image_size, 3])
            else:
                image = _aspect_preserving_resize(image, image_size)

        image = tf.to_float(image) / 255.0

        if batch_size is None:
            image = tf.expand_dims(image, 0)
        else:
            image_label_gram_matrices = tf.train.batch([image, label] +
                                                       gram_matrices,
                                                       batch_size=batch_size)
            image, label = image_label_gram_matrices[:2]
            gram_matrices = image_label_gram_matrices[2:]

        gram_matrices = dict(
            (vgg_layer, gram_matrix)
            for vgg_layer, gram_matrix in zip(vgg_layers, gram_matrices))
        return image, label, gram_matrices
Esempio n. 3
0
def _parse_example_proto(example_serialized):
    """Parses an Example proto containing a training example of an image.

    The output of the build_image_data.py image preprocessing script is a dataset
    containing serialized Example protocol buffers. Each Example proto contains
    the following fields:

      image/height: 462
      image/width: 581
      image/colorspace: 'RGB'
      image/channels: 3
      image/class/label: 615
      image/class/synset: 'n03623198'
      image/class/text: 'knee pad'
      image/object/bbox/xmin: 0.1
      image/object/bbox/xmax: 0.9
      image/object/bbox/ymin: 0.2
      image/object/bbox/ymax: 0.6
      image/object/bbox/label: 615
      image/format: 'JPEG'
      image/filename: 'ILSVRC2012_val_00041207.JPEG'
      image/encoded: <JPEG encoded string>

    Args:
      example_serialized: scalar Tensor tf.string containing a serialized
        Example protocol buffer.

    Returns:
      image_buffer: Tensor tf.string containing the contents of a JPEG file.
      label: Tensor tf.int32 containing the label.
      bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
        where each coordinate is [0, 1) and the coordinates are arranged as
        [ymin, xmin, ymax, xmax].
      text: Tensor tf.string containing the human-readable label.
    """
    # Dense features in Example proto.
    feature_map = {
        'image/encoded':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
        'image/class/label':
        tf.FixedLenFeature([1], dtype=tf.int64, default_value=-1),
        'image/class/text':
        tf.FixedLenFeature([], dtype=tf.string, default_value=''),
    }
    sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
    # Sparse features in Example proto.
    feature_map.update({
        k: sparse_float32
        for k in [
            'image/object/bbox/xmin', 'image/object/bbox/ymin',
            'image/object/bbox/xmax', 'image/object/bbox/ymax'
        ]
    })

    features = tf.parse_single_example(example_serialized, feature_map)
    label = tf.cast(features['image/class/label'], dtype=tf.int32)

    xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
    ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
    xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
    ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)

    # Note that we impose an ordering of (y, x) just to make life difficult.
    bbox = tf.concat([ymin, xmin, ymax, xmax], 0)

    # Force the variable number of bounding boxes into the shape
    # [1, num_boxes, coords].
    bbox = tf.expand_dims(bbox, 0)
    bbox = tf.transpose(bbox, [0, 2, 1])

    return features['image/encoded'], label, bbox, features['image/class/text']
    def decode(self,
               decoder_input,
               encoder_output,
               encoder_decoder_attention_bias,
               decoder_self_attention_bias,
               hparams,
               cache=None,
               decode_loop_step=None,
               nonpadding=None,
               losses=None,
               **kwargs):
        """Decode Universal Transformer outputs from encoder representation.

    It is similar to "transformer.decode", but it uses
    "universal_transformer_util.universal_transformer_decoder" instead of
    "transformer.transformer_decoder".

    Args:
      decoder_input: inputs to bottom of the model. [batch_size, decoder_length,
        hidden_dim]
      encoder_output: Encoder representation. [batch_size, input_length,
        hidden_dim]
      encoder_decoder_attention_bias: Bias and mask weights for encoder-decoder
        attention. [batch_size, input_length]
      decoder_self_attention_bias: Bias and mask weights for decoder
        self-attention. [batch_size, decoder_length]
      hparams: hyperparmeters for model.
      cache: Unimplemented.
      decode_loop_step: Unused.
      nonpadding: optional Tensor with shape [batch_size, decoder_length]
      losses: Unused.
      **kwargs: additional arguments to pass to decoder_function

    Returns:
       Tuple of:
         Final decoder representation. [batch_size, decoder_length,
            hidden_dim]
         encoder_extra_output: which is extra encoder output used in some
            variants of the model (e.g. in ACT, to pass the ponder-time to body)

    """
        del decode_loop_step
        del losses
        # TODO(dehghani): enable caching.
        del cache

        decoder_input = tf.nn.dropout(
            decoder_input, 1.0 - hparams.layer_prepostprocess_dropout)

        # No caching in Universal Transformers!
        (decoder_output, dec_extra_output) = (
            universal_transformer_util.universal_transformer_decoder(
                decoder_input,
                encoder_output,
                decoder_self_attention_bias,
                encoder_decoder_attention_bias,
                hparams,
                nonpadding=nonpadding,
                save_weights_to=self.attention_weights))

        # Expand since t2t expects 4d tensors.
        return tf.expand_dims(decoder_output, axis=2), dec_extra_output
Esempio n. 5
0
    def get_prediction_module(self, bert_model, features, is_training,
                              percent_done):
        final_hidden = bert_model.get_sequence_output()

        final_hidden_shape = modeling.get_shape_list(final_hidden,
                                                     expected_rank=3)
        batch_size = final_hidden_shape[0]
        seq_length = final_hidden_shape[1]

        answer_mask = tf.cast(features["input_mask"], tf.float32)
        answer_mask *= tf.cast(features["segment_ids"], tf.float32)
        answer_mask += tf.one_hot(0, seq_length)

        start_logits = tf.squeeze(tf.layers.dense(final_hidden, 1), -1)

        start_top_log_probs = tf.zeros([batch_size, self.config.beam_size])
        start_top_index = tf.zeros([batch_size, self.config.beam_size],
                                   tf.int32)
        end_top_log_probs = tf.zeros(
            [batch_size, self.config.beam_size, self.config.beam_size])
        end_top_index = tf.zeros(
            [batch_size, self.config.beam_size, self.config.beam_size],
            tf.int32)
        if self.config.joint_prediction:
            start_logits += 1000.0 * (answer_mask - 1)
            start_log_probs = tf.nn.log_softmax(start_logits)
            start_top_log_probs, start_top_index = tf.nn.top_k(
                start_log_probs, k=self.config.beam_size)

            if not is_training:
                # batch, beam, length, hidden
                end_features = tf.tile(tf.expand_dims(final_hidden, 1),
                                       [1, self.config.beam_size, 1, 1])
                # batch, beam, length
                start_index = tf.one_hot(start_top_index,
                                         depth=seq_length,
                                         axis=-1,
                                         dtype=tf.float32)
                # batch, beam, hidden
                start_features = tf.reduce_sum(
                    tf.expand_dims(final_hidden, 1) *
                    tf.expand_dims(start_index, -1),
                    axis=-2)
                # batch, beam, length, hidden
                start_features = tf.tile(tf.expand_dims(start_features, 2),
                                         [1, 1, seq_length, 1])
            else:
                start_index = tf.one_hot(features[self.name +
                                                  "_start_positions"],
                                         depth=seq_length,
                                         axis=-1,
                                         dtype=tf.float32)
                start_features = tf.reduce_sum(
                    tf.expand_dims(start_index, -1) * final_hidden, axis=1)
                start_features = tf.tile(tf.expand_dims(start_features, 1),
                                         [1, seq_length, 1])
                end_features = final_hidden

            final_repr = tf.concat([start_features, end_features], -1)
            final_repr = tf.layers.dense(final_repr,
                                         512,
                                         activation=modeling.gelu,
                                         name="qa_hidden")
            # batch, beam, length (batch, length when training)
            end_logits = tf.squeeze(tf.layers.dense(final_repr, 1),
                                    -1,
                                    name="qa_logits")
            if is_training:
                end_logits += 1000.0 * (answer_mask - 1)
            else:
                end_logits += tf.expand_dims(1000.0 * (answer_mask - 1), 1)

            if not is_training:
                end_log_probs = tf.nn.log_softmax(end_logits)
                end_top_log_probs, end_top_index = tf.nn.top_k(
                    end_log_probs, k=self.config.beam_size)
                end_logits = tf.zeros([batch_size, seq_length])
        else:
            end_logits = tf.squeeze(tf.layers.dense(final_hidden, 1), -1)
            start_logits += 1000.0 * (answer_mask - 1)
            end_logits += 1000.0 * (answer_mask - 1)

        def compute_loss(logits, positions):
            one_hot_positions = tf.one_hot(positions,
                                           depth=seq_length,
                                           dtype=tf.float32)
            log_probs = tf.nn.log_softmax(logits, axis=-1)
            loss = -tf.reduce_sum(one_hot_positions * log_probs, axis=-1)
            return loss

        start_positions = features[self.name + "_start_positions"]
        end_positions = features[self.name + "_end_positions"]

        start_loss = compute_loss(start_logits, start_positions)
        end_loss = compute_loss(end_logits, end_positions)

        losses = (start_loss + end_loss) / 2.0

        # plausible answer loss
        plau_logits = tf.layers.dense(final_hidden, 2)
        plau_logits = tf.reshape(plau_logits, [batch_size, seq_length, 2])
        plau_logits = tf.transpose(plau_logits, [2, 0, 1])
        unstacked_logits = tf.unstack(plau_logits, axis=0)
        (plau_start_logits, plau_end_logits) = (unstacked_logits[0],
                                                unstacked_logits[1])
        plau_start_logits += 1000.0 * (answer_mask - 1)
        plau_end_logits += 1000.0 * (answer_mask - 1)
        plau_start_positions = features[self.name + "_plau_answer_start"]
        plau_end_positions = features[self.name + "_plau_answer_end"]
        plau_start_loss = compute_loss(plau_start_logits, plau_start_positions)
        plau_end_loss = compute_loss(plau_end_logits, plau_end_positions)
        losses += (plau_start_loss + plau_end_loss) / 2.0

        answerable_logit = tf.zeros([batch_size])
        if self.config.answerable_classifier:
            final_repr = final_hidden[:, 0]
            if self.config.answerable_uses_start_logits:
                start_p = tf.nn.softmax(start_logits)
                start_feature = tf.reduce_sum(tf.expand_dims(start_p, -1) *
                                              final_hidden,
                                              axis=1)
                final_repr = tf.concat([final_repr, start_feature], -1)
                final_repr = tf.layers.dense(final_repr,
                                             512,
                                             activation=modeling.gelu)
            answerable_logit = tf.squeeze(tf.layers.dense(final_repr, 1), -1)
            answerable_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.cast(features[self.name + "_is_impossible"],
                               tf.float32),
                logits=answerable_logit)
            losses += answerable_loss * self.config.answerable_weight

        from finetune.qa.rl_loss import rl_loss

        loss_rl = rl_loss(start_logits,
                          end_logits,
                          start_positions,
                          end_positions,
                          sample_num=4)
        losses += 0.5 * loss_rl

        return losses, dict(
            loss=losses,
            start_logits=start_logits,
            end_logits=end_logits,
            answerable_logit=answerable_logit,
            start_positions=features[self.name + "_start_positions"],
            end_positions=features[self.name + "_end_positions"],
            start_top_log_probs=start_top_log_probs,
            start_top_index=start_top_index,
            end_top_log_probs=end_top_log_probs,
            end_top_index=end_top_index,
            eid=features[self.name + "_eid"],
        )
Esempio n. 6
0
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
    """Model definition entry.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.

  Raises:
    RuntimeError: if both ckpt and backbone_ckpt are set.
  """
    utils.image('input_image', features)
    training_hooks = []

    if params['use_keras_model']:

        def model_fn(inputs):
            model = efficientdet_keras.EfficientDetNet(
                config=hparams_config.Config(params))
            cls_out_list, box_out_list = model(inputs,
                                               params['is_training_bn'])
            cls_outputs, box_outputs = {}, {}
            for i in range(params['min_level'], params['max_level'] + 1):
                cls_outputs[i] = cls_out_list[i - params['min_level']]
                box_outputs[i] = box_out_list[i - params['min_level']]
            return cls_outputs, box_outputs
    else:
        model_fn = functools.partial(model,
                                     config=hparams_config.Config(params))

    precision = utils.get_precision(params['strategy'],
                                    params['mixed_precision'])
    cls_outputs, box_outputs = utils.build_model_with_precision(
        precision, model_fn, features, params['is_training_bn'])

    levels = cls_outputs.keys()
    for level in levels:
        cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
        box_outputs[level] = tf.cast(box_outputs[level], tf.float32)

    # First check if it is in PREDICT mode.
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'image': features,
        }
        for level in levels:
            predictions['cls_outputs_%d' % level] = cls_outputs[level]
            predictions['box_outputs_%d' % level] = box_outputs[level]
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Set up training loss and learning rate.
    update_learning_rate_schedule_parameters(params)
    global_step = tf.train.get_or_create_global_step()
    learning_rate = learning_rate_schedule(params, global_step)

    # cls_loss and box_loss are for logging. only total_loss is optimized.
    det_loss, cls_loss, box_loss, box_iou_loss = detection_loss(
        cls_outputs, box_outputs, labels, params)
    reg_l2loss = reg_l2_loss(params['weight_decay'])
    total_loss = det_loss + reg_l2loss

    if mode == tf.estimator.ModeKeys.TRAIN:
        utils.scalar('lrn_rate', learning_rate)
        utils.scalar('trainloss/cls_loss', cls_loss)
        utils.scalar('trainloss/box_loss', box_loss)
        utils.scalar('trainloss/det_loss', det_loss)
        utils.scalar('trainloss/reg_l2_loss', reg_l2loss)
        utils.scalar('trainloss/loss', total_loss)
        if params['iou_loss_type']:
            utils.scalar('trainloss/box_iou_loss', box_iou_loss)
        train_epochs = tf.cast(global_step,
                               tf.float32) / params['steps_per_epoch']
        utils.scalar('train_epochs', train_epochs)

    moving_average_decay = params['moving_average_decay']
    if moving_average_decay:
        ema = tf.train.ExponentialMovingAverage(decay=moving_average_decay,
                                                num_updates=global_step)
        ema_vars = utils.get_ema_vars()
    if params['strategy'] == 'horovod':
        import horovod.tensorflow as hvd  # pylint: disable=g-import-not-at-top
        learning_rate = learning_rate * hvd.size()
    if mode == tf.estimator.ModeKeys.TRAIN:
        if params['optimizer'].lower() == 'sgd':
            optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                   momentum=params['momentum'])
        elif params['optimizer'].lower() == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate)
        else:
            raise ValueError('optimizers should be adam or sgd')

        if params['strategy'] == 'tpu':
            optimizer = tf.tpu.CrossShardOptimizer(optimizer)
        elif params['strategy'] == 'horovod':
            optimizer = hvd.DistributedOptimizer(optimizer)
            training_hooks = [hvd.BroadcastGlobalVariablesHook(0)]

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        var_list = tf.trainable_variables()
        if variable_filter_fn:
            var_list = variable_filter_fn(var_list)

        if params.get('clip_gradients_norm', None):
            logging.info('clip gradients norm by %f',
                         params['clip_gradients_norm'])
            grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
            with tf.name_scope('clip'):
                grads = [gv[0] for gv in grads_and_vars]
                tvars = [gv[1] for gv in grads_and_vars]
                # First clip each variable's norm, then clip global norm.
                clip_norm = abs(params['clip_gradients_norm'])
                clipped_grads = [tf.clip_by_norm(g, clip_norm) for g in grads]
                clipped_grads, _ = tf.clip_by_global_norm(
                    clipped_grads, clip_norm)
                utils.scalar('gradient_norm',
                             tf.linalg.global_norm(clipped_grads))
                grads_and_vars = list(zip(clipped_grads, tvars))

            with tf.control_dependencies(update_ops):
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step)
        else:
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(total_loss,
                                              global_step,
                                              var_list=var_list)

        if moving_average_decay:
            with tf.control_dependencies([train_op]):
                train_op = ema.apply(ema_vars)

    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(**kwargs):
            """Returns a dictionary that has the evaluation metrics."""
            if params['nms_configs'].get('pyfunc', True):
                detections_bs = []
                for index in range(kwargs['boxes'].shape[0]):
                    nms_configs = params['nms_configs']
                    detections = tf.numpy_function(
                        functools.partial(nms_np.per_class_nms,
                                          nms_configs=nms_configs),
                        [
                            kwargs['boxes'][index],
                            kwargs['scores'][index],
                            kwargs['classes'][index],
                            tf.slice(kwargs['image_ids'], [index], [1]),
                            tf.slice(kwargs['image_scales'], [index], [1]),
                            params['num_classes'],
                            nms_configs['max_output_size'],
                        ], tf.float32)
                    detections_bs.append(detections)
                detections_bs = postprocess.transform_detections(
                    tf.stack(detections_bs))
            else:
                # These two branches should be equivalent, but currently they are not.
                # TODO(tanmingxing): enable the non_pyfun path after bug fix.
                nms_boxes, nms_scores, nms_classes, _ = postprocess.per_class_nms(
                    params, kwargs['boxes'], kwargs['scores'],
                    kwargs['classes'], kwargs['image_scales'])
                img_ids = tf.cast(tf.expand_dims(kwargs['image_ids'], -1),
                                  nms_scores.dtype)
                detections_bs = [
                    img_ids * tf.ones_like(nms_scores),
                    nms_boxes[:, :, 1],
                    nms_boxes[:, :, 0],
                    nms_boxes[:, :, 3] - nms_boxes[:, :, 1],
                    nms_boxes[:, :, 2] - nms_boxes[:, :, 0],
                    nms_scores,
                    nms_classes,
                ]
                detections_bs = tf.stack(detections_bs,
                                         axis=-1,
                                         name='detnections')

            if params.get('testdev_dir', None):
                logging.info('Eval testdev_dir %s', params['testdev_dir'])
                eval_metric = coco_metric.EvaluationMetric(
                    testdev_dir=params['testdev_dir'])
                coco_metrics = eval_metric.estimator_metric_fn(
                    detections_bs, tf.zeros([1]))
            else:
                logging.info('Eval val with groudtruths %s.',
                             params['val_json_file'])
                eval_metric = coco_metric.EvaluationMetric(
                    filename=params['val_json_file'])
                coco_metrics = eval_metric.estimator_metric_fn(
                    detections_bs, kwargs['groundtruth_data'],
                    params['label_map'])

            # Add metrics to output.
            cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
            box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
            output_metrics = {
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            }
            output_metrics.update(coco_metrics)
            return output_metrics

        cls_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(cls_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])
        box_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(box_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])

        cls_outputs = postprocess.to_list(cls_outputs)
        box_outputs = postprocess.to_list(box_outputs)
        params['nms_configs']['max_nms_inputs'] = anchors.MAX_DETECTION_POINTS
        boxes, scores, classes = postprocess.pre_nms(params, cls_outputs,
                                                     box_outputs)
        metric_fn_inputs = {
            'cls_loss_repeat': cls_loss_repeat,
            'box_loss_repeat': box_loss_repeat,
            'image_ids': labels['source_ids'],
            'groundtruth_data': labels['groundtruth_data'],
            'image_scales': labels['image_scales'],
            'boxes': boxes,
            'scores': scores,
            'classes': classes,
        }
        eval_metrics = (metric_fn, metric_fn_inputs)

    checkpoint = params.get('ckpt') or params.get('backbone_ckpt')

    if checkpoint and mode == tf.estimator.ModeKeys.TRAIN:
        # Initialize the model from an EfficientDet or backbone checkpoint.
        if params.get('ckpt') and params.get('backbone_ckpt'):
            raise RuntimeError(
                '--backbone_ckpt and --checkpoint are mutually exclusive')

        if params.get('backbone_ckpt'):
            var_scope = params['backbone_name'] + '/'
            if params['ckpt_var_scope'] is None:
                # Use backbone name as default checkpoint scope.
                ckpt_scope = params['backbone_name'] + '/'
            else:
                ckpt_scope = params['ckpt_var_scope'] + '/'
        else:
            # Load every var in the given checkpoint
            var_scope = ckpt_scope = '/'

        def scaffold_fn():
            """Loads pretrained model through scaffold function."""
            logging.info('restore variables from %s', checkpoint)

            var_map = utils.get_ckpt_var_map(
                ckpt_path=checkpoint,
                ckpt_scope=ckpt_scope,
                var_scope=var_scope,
                skip_mismatch=params['skip_mismatch'])

            tf.train.init_from_checkpoint(checkpoint, var_map)

            return tf.train.Scaffold()
    elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay:

        def scaffold_fn():
            """Load moving average variables for eval."""
            logging.info('Load EMA vars with ema_decay=%f',
                         moving_average_decay)
            restore_vars_dict = ema.variables_to_restore(ema_vars)
            saver = tf.train.Saver(restore_vars_dict)
            return tf.train.Scaffold(saver=saver)
    else:
        scaffold_fn = None

    if params['strategy'] != 'tpu':
        # Profile every 1K steps.
        profile_hook = tf.train.ProfilerHook(save_steps=1000,
                                             output_dir=params['model_dir'])
        training_hooks.append(profile_hook)

        # Report memory allocation if OOM
        class OomReportingHook(tf.estimator.SessionRunHook):
            def before_run(self, run_context):
                return tf.estimator.SessionRunArgs(
                    fetches=[],
                    options=tf.RunOptions(
                        report_tensor_allocations_upon_oom=True))

        training_hooks.append(OomReportingHook())

        logging_hook = tf.train.LoggingTensorHook(
            {
                'step': global_step,
                'det_loss': det_loss,
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            },
            every_n_iter=params.get('iterations_per_loop', 100),
        )
        training_hooks.append(logging_hook)

    return tf.estimator.tpu.TPUEstimatorSpec(mode=mode,
                                             loss=total_loss,
                                             train_op=train_op,
                                             eval_metrics=eval_metrics,
                                             host_call=utils.get_tpu_host_call(
                                                 global_step, params),
                                             scaffold_fn=scaffold_fn,
                                             training_hooks=training_hooks)
Esempio n. 7
0
def create_dual_approx(num_layers,
                       batch_size,
                       action_max,
                       W_T_list,
                       b_T_list,
                       action_tensor_center,
                       return_full_info=False):

    #layers_n: number of hidden units each layer
    #W_T_list, b_T_list: multiplicatie and bias weights for each layer
    #action_tensor_center: raw input, y: one-hot encoding of labels

    # List of bounds (l_i,u_i) for i = 2,...,K-1
    l_list = [tf.zeros_like(action_tensor_center)]
    u_list = [tf.zeros_like(action_tensor_center)]

    # List of transition matrices D_i for i = 2,...,K-1
    D_list = [tf.zeros_like(action_tensor_center)]

    # Indicators of spanning ReLu neurons for i = 2,...,K-1
    I_list = [tf.zeros_like(action_tensor_center)]

    # Indicators of active ReLu neurons for i = 2,...,K-1
    Ip_list = [tf.zeros_like(action_tensor_center)]

    # Final list of duals nu_i for i = 2,...,K-1
    Nu_list = [tf.zeros([batch_size, W_T_list[0].get_shape().as_list()[1], 1])]

    # Initialize Nu_K
    Nu_K = -tf.expand_dims(-tf.eye(1), axis=-1)

    # Final list of b_i'*nu_{i+1} for i = 1,...,K-1
    gamma_list = [b_T_list[0]]

    # Pre-compute bounds for layer 2
    # Initialize Nu_hat_1
    Nu_hat_1 = tf.tile(tf.expand_dims(W_T_list[0], axis=0), [batch_size, 1, 1])

    # Initialize bounds
    l_2 = tf.matmul(action_tensor_center,
                    W_T_list[0]) + gamma_list[0] - action_max * tf.norm(
                        Nu_hat_1, 1, axis=1, keepdims=False)
    u_2 = tf.matmul(action_tensor_center,
                    W_T_list[0]) + gamma_list[0] + action_max * tf.norm(
                        Nu_hat_1, 1, axis=1, keepdims=False)

    # Add to list (store in vector format)
    l_list.append(l_2)
    u_list.append(u_2)

    # Recursion

    for i in range(2, num_layers):
        # form Ip, I
        Ip_i, I_i = get_I(l_list[i - 1], u_list[i - 1])
        I_list.append(I_i)
        Ip_list.append(Ip_i)

        # form D
        D_i = get_D(l_list[i - 1], u_list[i - 1], Ip_i, I_i)
        D_list.append(D_i)

        # initialize nu_i
        Nu_list.append(tf.einsum('ij,jk->ijk', D_i, W_T_list[i - 1]))

        # initialize gamma_i
        gamma_list.append(b_T_list[i - 1])

        # if final iteration, update with Nu_K
        if i == num_layers - 1:
            Nu_K = tf.tile(Nu_K,
                           [Nu_list[i - 1].get_shape().as_list()[0], 1, 1])
            Nu_list[i - 1] = tf.einsum('ijk,ikm->ijm', Nu_list[i - 1], Nu_K)
            gamma_list[i - 1] = tf.einsum('ij,ijm->im', gamma_list[i - 1],
                                          Nu_K)

        # initialize next layer bounds
        l_ip1 = tf.einsum('ij,ijm->im', l_list[i - 1] * I_list[i - 1],
                          tf.nn.relu(-Nu_list[i - 1]))
        u_ip1 = -tf.einsum('ij,ijm->im', l_list[i - 1] * I_list[i - 1],
                           tf.nn.relu(Nu_list[i - 1]))

        # update nu for layers i-1,...,2
        for j in range(i - 1, 1, -1):
            Nu_hat_j = tf.einsum('jk,ikm->ijm', W_T_list[j - 1], Nu_list[j])

            Nu_list[j - 1] = tf.einsum('ij,ijk->ijk', D_list[j - 1], Nu_hat_j)

            l_ip1 = tf.add(
                l_ip1,
                tf.einsum('ij,ijm->im', l_list[j - 1] * I_list[j - 1],
                          tf.nn.relu(-Nu_list[j - 1])))
            u_ip1 = tf.subtract(
                u_ip1,
                tf.einsum('ij,ijm->im', l_list[j - 1] * I_list[j - 1],
                          tf.nn.relu(Nu_list[j - 1])))

        # update nu_hat_1
        Nu_hat_1 = tf.einsum('jk,ikm->ijm', W_T_list[0], Nu_list[1])

        # start sum
        psi = tf.einsum('ij,ijm->im', action_tensor_center,
                        Nu_hat_1) + gamma_list[i - 1]

        # update gamma for layers 1,...,i-1
        for j in range(1, i):
            gamma_list[j - 1] = tf.einsum('ij,ijm->im', b_T_list[j - 1],
                                          Nu_list[j])

            psi = tf.add(psi, gamma_list[j - 1])

        Nu_hat_1_norm = tf.norm(Nu_hat_1, 1, axis=1, keepdims=False)

        if i < num_layers - 1:
            # finalize bounds
            l_ip1 = tf.add(l_ip1, psi - action_max * Nu_hat_1_norm)
            u_ip1 = tf.add(u_ip1, psi + action_max * Nu_hat_1_norm)

            # add to list
            l_list.append(l_ip1)
            u_list.append(u_ip1)

        else:
            # compute J_tilde
            J_tilde = -psi - action_max * Nu_hat_1_norm - u_ip1

    if return_full_info:
        return (-J_tilde, l_list, u_list, D_list, Nu_list, gamma_list, psi,
                l_ip1, u_ip1, Nu_hat_1)
    else:
        return -J_tilde
Esempio n. 8
0
def read_png(filename):
    """Creates graph to load a PNG image file."""
    string = tf.io.read_file(filename)
    image = tf.image.decode_image(string)
    image = tf.expand_dims(image, 0)
    return image
Esempio n. 9
0
def resize_to_range(image,
                    masks=None,
                    min_dimension=None,
                    max_dimension=None,
                    method=tf.image.ResizeMethod.BILINEAR,
                    align_corners=False,
                    pad_to_max_dimension=False):
  """Resizes an image so its dimensions are within the provided value.

  The output size can be described by two cases:
  1. If the image can be rescaled so its minimum dimension is equal to the
     provided value without the other dimension exceeding max_dimension,
     then do so.
  2. Otherwise, resize so the largest dimension is equal to max_dimension.

  Args:
    image: A 3D tensor of shape [height, width, channels]
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks.
    min_dimension: (optional) (scalar) desired size of the smaller image
                   dimension.
    max_dimension: (optional) (scalar) maximum allowed size
                   of the larger image dimension.
    method: (optional) interpolation method used in resizing. Defaults to
            BILINEAR.
    align_corners: bool. If true, exactly align all 4 corners of the input
                   and output. Defaults to False.
    pad_to_max_dimension: Whether to resize the image and pad it with zeros
      so the resulting image is of the spatial size
      [max_dimension, max_dimension]. If masks are included they are padded
      similarly.

  Returns:
    Note that the position of the resized_image_shape changes based on whether
    masks are present.
    resized_image: A 3D tensor of shape [new_height, new_width, channels],
      where the image has been resized (with bilinear interpolation) so that
      min(new_height, new_width) == min_dimension or
      max(new_height, new_width) == max_dimension.
    resized_masks: If masks is not None, also outputs masks. A 3D tensor of
      shape [num_instances, new_height, new_width].
    resized_image_shape: A 1D tensor of shape [3] containing shape of the
      resized image.

  Raises:
    ValueError: if the image is not a 3D tensor.
  """
  if len(image.get_shape()) != 3:
    raise ValueError('Image should be 3D tensor')

  with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
    if image.get_shape().is_fully_defined():
      new_size = _compute_new_static_size(image, min_dimension, max_dimension)
    else:
      new_size = _compute_new_dynamic_size(image, min_dimension, max_dimension)
    new_image = tf.image.resize_images(
        image, new_size[:-1], method=method, align_corners=align_corners)

    if pad_to_max_dimension:
      new_image = tf.image.pad_to_bounding_box(
          new_image, 0, 0, max_dimension, max_dimension)

    result = [new_image]
    if masks is not None:
      new_masks = tf.expand_dims(masks, 3)
      new_masks = tf.image.resize_images(
          new_masks,
          new_size[:-1],
          method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
          align_corners=align_corners)
      new_masks = tf.squeeze(new_masks, 3)
      if pad_to_max_dimension:
        new_masks = tf.image.pad_to_bounding_box(
            new_masks, 0, 0, max_dimension, max_dimension)
      result.append(new_masks)

    result.append(new_size)
    return result
Esempio n. 10
0
    def define_loss(self, features, outputs):
        """Obtain the loss of the model."""
        # Intents.
        # Shape: (batch_size, max_num_intents + 1).
        intent_logits = outputs["logit_intent_status"]
        # Shape: (batch_size, max_num_intents).
        intent_labels = features["intent_status"]
        # Add label corresponding to NONE intent.
        num_active_intents = tf.expand_dims(tf.reduce_sum(intent_labels,
                                                          axis=1),
                                            axis=1)
        none_intent_label = tf.ones_like(
            num_active_intents) - num_active_intents
        # Shape: (batch_size, max_num_intents + 1).
        onehot_intent_labels = tf.concat([none_intent_label, intent_labels],
                                         axis=1)
        intent_loss = tf.losses.softmax_cross_entropy(
            onehot_intent_labels,
            intent_logits,
            weights=features["is_real_example"])

        # Requested slots.
        # Shape: (batch_size, max_num_slots).
        requested_slot_logits = outputs["logit_req_slot_status"]
        requested_slot_labels = features["req_slot_status"]
        max_num_requested_slots = requested_slot_labels.get_shape().as_list(
        )[-1]
        weights = tf.sequence_mask(features["req_slot_num"],
                                   maxlen=max_num_requested_slots)
        # Sigmoid cross entropy is used because more than one slots can be requested
        # in a single utterance.
        requested_slot_loss = tf.losses.sigmoid_cross_entropy(
            requested_slot_labels, requested_slot_logits, weights=weights)

        # Categorical slot status.
        # Shape: (batch_size, max_num_cat_slots, 3).
        cat_slot_status_logits = outputs["logit_cat_slot_status"]
        cat_slot_status_labels = features["cat_slot_status"]
        max_num_cat_slots = cat_slot_status_labels.get_shape().as_list()[-1]
        one_hot_labels = tf.one_hot(cat_slot_status_labels, 3, dtype=tf.int32)
        cat_weights = tf.sequence_mask(features["cat_slot_num"],
                                       maxlen=max_num_cat_slots,
                                       dtype=tf.float32)
        cat_slot_status_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(one_hot_labels, [-1, 3]),
            tf.reshape(cat_slot_status_logits, [-1, 3]),
            weights=tf.reshape(cat_weights, [-1]))

        # Categorical slot values.
        # Shape: (batch_size, max_num_cat_slots, max_num_slot_values).
        cat_slot_value_logits = outputs["logit_cat_slot_value"]
        cat_slot_value_labels = features["cat_slot_value"]
        max_num_slot_values = cat_slot_value_logits.get_shape().as_list()[-1]
        one_hot_labels = tf.one_hot(cat_slot_value_labels,
                                    max_num_slot_values,
                                    dtype=tf.int32)
        # Zero out losses for categorical slot value when the slot status is not
        # active.
        cat_loss_weight = tf.cast(
            tf.equal(cat_slot_status_labels, data_utils.STATUS_ACTIVE),
            tf.float32)
        cat_slot_value_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(one_hot_labels, [-1, max_num_slot_values]),
            tf.reshape(cat_slot_value_logits, [-1, max_num_slot_values]),
            weights=tf.reshape(cat_weights * cat_loss_weight, [-1]))

        # Non-categorical slot status.
        # Shape: (batch_size, max_num_noncat_slots, 3).
        noncat_slot_status_logits = outputs["logit_noncat_slot_status"]
        noncat_slot_status_labels = features["noncat_slot_status"]
        max_num_noncat_slots = noncat_slot_status_labels.get_shape().as_list(
        )[-1]
        one_hot_labels = tf.one_hot(noncat_slot_status_labels,
                                    3,
                                    dtype=tf.int32)
        noncat_weights = tf.sequence_mask(features["noncat_slot_num"],
                                          maxlen=max_num_noncat_slots,
                                          dtype=tf.float32)
        # Logits for padded (invalid) values are already masked.
        noncat_slot_status_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(one_hot_labels, [-1, 3]),
            tf.reshape(noncat_slot_status_logits, [-1, 3]),
            weights=tf.reshape(noncat_weights, [-1]))

        # Non-categorical slot spans.
        # Shape: (batch_size, max_num_noncat_slots, max_num_tokens).
        span_start_logits = outputs["logit_noncat_slot_start"]
        span_start_labels = features["noncat_slot_value_start"]
        max_num_tokens = span_start_logits.get_shape().as_list()[-1]
        onehot_start_labels = tf.one_hot(span_start_labels,
                                         max_num_tokens,
                                         dtype=tf.int32)
        # Shape: (batch_size, max_num_noncat_slots, max_num_tokens).
        span_end_logits = outputs["logit_noncat_slot_end"]
        span_end_labels = features["noncat_slot_value_end"]
        onehot_end_labels = tf.one_hot(span_end_labels,
                                       max_num_tokens,
                                       dtype=tf.int32)
        # Zero out losses for non-categorical slot spans when the slot status is not
        # active.
        noncat_loss_weight = tf.cast(
            tf.equal(noncat_slot_status_labels, data_utils.STATUS_ACTIVE),
            tf.float32)
        span_start_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(onehot_start_labels, [-1, max_num_tokens]),
            tf.reshape(span_start_logits, [-1, max_num_tokens]),
            weights=tf.reshape(noncat_weights * noncat_loss_weight, [-1]))
        span_end_loss = tf.losses.softmax_cross_entropy(
            tf.reshape(onehot_end_labels, [-1, max_num_tokens]),
            tf.reshape(span_end_logits, [-1, max_num_tokens]),
            weights=tf.reshape(noncat_weights * noncat_loss_weight, [-1]))

        losses = {
            "intent_loss": intent_loss,
            "requested_slot_loss": requested_slot_loss,
            "cat_slot_status_loss": cat_slot_status_loss,
            "cat_slot_value_loss": cat_slot_value_loss,
            "noncat_slot_status_loss": noncat_slot_status_loss,
            "span_start_loss": span_start_loss,
            "span_end_loss": span_end_loss,
        }
        for loss_name, loss in losses.items():
            tf.summary.scalar(loss_name, loss)
        return sum(losses.values()) / len(losses)
    def __init__(self,
                 network_name,
                 initializer,
                 regularizer,
                 vocab_size,
                 embedding_size,
                 n_class,
                 batch_size,
                 filter_heights,
                 num_filters,
                 num_units,
                 layers=3,
                 *args,
                 **kwargs):
        self.network_name = network_name
        self.initializer = initializer
        self.regularizer = regularizer
        self.vocab_size = vocab_size
        self.n_class = n_class
        self.batch_size = batch_size
        self.filter_heights = filter_heights
        if isinstance(num_filters, list):
            # isinstance: 判断num_filters对象是不是list,是返回True,否则返回False
            if len(self.filter_heights) != len(num_filters):
                raise Exception("filter_heights和num_filters必须长度一致")
            else:
                self.num_filters = num_filters
        elif isinstance(num_filters, int):
            self.num_filters = [num_filters for _ in self.filter_heights]
        else:
            raise Exception("参数num_filters只能是list列表或者int类型的数字!!!")
        self.embedding_size = embedding_size
        self.num_units = num_units
        self.layers = layers

        with tf.variable_scope(self.network_name,
                               initializer=self.initializer,
                               regularizer=self.regularizer):
            # 1. Placeholders for input, output, dropout, batch_size
            with tf.variable_scope("placeholders"):
                self.input = tf.placeholder(tf.int32, [None, None],
                                            name='input_x')
                self.output = tf.placeholder(tf.int32, [None], name='input_y')
                self.dropout_keep_prob = tf.placeholder_with_default(
                    1.0, shape=[], name='dropout_keep_prob')
                self.batch_size = tf.placeholder_with_default(
                    self.batch_size, shape=[], name='batch_size')
                # 计算一个批次中序列的长度(因为填充式填充0)
                # [N,T] -> [N,T] -> [N,T] -> [N,]
                self.lengths = tf.reduce_sum(tf.sign(tf.abs(self.input)),
                                             axis=-1)

            # 1.5 Embedding Layer
            with tf.device('/cpu:0'), tf.name_scope("embedding"):
                self.embedding = tf.Variable(
                    # 指定初始化的范围
                    tf.random_uniform([self.vocab_size, self.embedding_size],
                                      -1.0, 1.0),
                    name="W")
                # embedded_chars结构为[batch_size, sequence_length, embedding_size], [N, T, E]
                self.embedded_chars = tf.nn.embedding_lookup(
                    self.embedding, self.input)
                # 转化为4维的,原本是三维的,tf处理的是4维的,新维度是-1;
                # [batch_size, sequence_length, embedding_size, channel], [N, T, E, 1]
                self.embedded_chars_expanded = tf.expand_dims(
                    self.embedded_chars, -1)

            # 2. Build CNN + LSTM output
            outputs = []
            num_filters_total = 0
            print(filter_heights, num_filters)
            with tf.variable_scope("cnn-rnn"):
                for idx, filter_height in enumerate(self.filter_heights):
                    with tf.variable_scope("conv-%s" % idx):
                        # Convolution Layer
                        num_filters_total += self.num_filters[idx]
                        # filter_size选几个单词h,embedding_size每个占了多长w   7*5*1  输入1维,输出128维 128个特征图
                        filter_shape = [
                            filter_height, self.embedding_size, 1,
                            self.num_filters[idx]
                        ]
                        # 高斯初始化
                        print(filter_shape)
                        W = tf.Variable(tf.truncated_normal(filter_shape,
                                                            stddev=0.01),
                                        name="W")
                        print(W)
                        # 初始化为常量0.1
                        b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                        name="b")
                        print(b)
                        conv = tf.nn.conv2d(
                            self.embedded_chars_expanded,
                            W,
                            strides=[1, 1, 1, 1],
                            padding="VALID",  # 不做padding
                            name="conv")
                        # Apply nonlinearity: [N, H, W, C]
                        # N: 样本数目(批次大小)
                        # H: 卷积之后的高度: h = length - filter_height + 1
                        # W: 1
                        # C: self.num_filters[i]
                        h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                    with tf.variable_scope("lstm-%s" % idx):
                        # 0. 构建lstm的输入以及长度(因为这里的卷积不进行填充,序列长度会发生改变)
                        lengths = self.lengths - filter_height + 1
                        cell_inputs = tf.squeeze(
                            h, axis=2)  # [B,T,1,D] -> [B,T,D]

                        # 1. 构建RNN Cell
                        def cell(units):
                            return tf.nn.rnn_cell.BasicLSTMCell(units)

                        cell_fw = tf.nn.rnn_cell.MultiRNNCell(cells=[
                            cell(self.num_units) for _ in range(self.layers)
                        ])
                        cell_bw = tf.nn.rnn_cell.MultiRNNCell(cells=[
                            cell(self.num_units) for _ in range(self.layers)
                        ])

                        # 2. 动态构建RNN结构
                        (output_fw,
                         output_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                             cell_fw=cell_fw,  # 正向RNN Cell
                             cell_bw=cell_bw,  # 反向RNN Cell
                             inputs=
                             cell_inputs,  # RNN的输入,动态RNN要求输入的数据格式必须为: [B,T,D]
                             sequence_length=lengths,  # RNN输入数据的序列长度,必须为: [B,]
                             dtype=cell_inputs.dtype  # RNN初始化状态的数据类型
                         )

                        # 3. 结果拼接(如果是做反向的LSTM的话,获取最后一个时刻对应的输出值实际上是无用的)
                        batch_size = tf.shape(output_fw)[0]  # 获取批次大小
                        indices_fw = tf.concat(
                            [
                                tf.reshape(tf.range(batch_size),
                                           shape=(-1, 1)),  # 样本索引, [0,N-1]
                                tf.reshape(
                                    lengths - 1,
                                    shape=(-1, 1))  # 样本长度最后一个时刻的索引值, 每个样本的长度信息
                            ],
                            axis=-1)
                        indices_bw = tf.concat(
                            [
                                tf.reshape(tf.range(batch_size),
                                           shape=(-1, 1)),  # 样本索引, [0,N-1]
                                tf.reshape(tf.zeros_like(lengths - 1),
                                           shape=(-1, 1))  # 反向获取第一个时刻的值,索引位置为0
                            ],
                            axis=-1)
                        # 获取对应索引位置的值后,进行拼接
                        output = tf.concat(
                            (
                                tf.gather_nd(
                                    output_fw, indices_fw
                                ),  # 基于索引获取对应位置的值,[B,U], 获取正向的最后一个时刻的值
                                tf.gather_nd(output_bw, indices_bw
                                             )  # 基于索引获取对应位置的值,[B,U], 获取第一个时刻的值
                            ),
                            axis=-1)
                        outputs.append(output)

                # 做一个合并
                output = tf.concat(outputs, -1)

                # d. 做一个drop out操作
                h_drop = tf.nn.dropout(output,
                                       keep_prob=self.dropout_keep_prob)

            # 3. Build FC output
            with tf.variable_scope("fc"):
                in_units = h_drop.get_shape()[-1]
                w = tf.get_variable(name='w', shape=[in_units, self.n_class])
                b = tf.get_variable(name='b', shape=[self.n_class])
                self.scores = tf.nn.xw_plus_b(h_drop,
                                              weights=w,
                                              biases=b,
                                              name='scores')
                self.predictions = tf.argmax(self.scores,
                                             axis=1,
                                             name='predictions')

            # 4. Build Loss
            with tf.variable_scope("loss"):
                self.losses = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        labels=self.output, logits=self.scores))
                tf.losses.add_loss(self.losses)
                self.total_loss = tf.losses.get_total_loss(name='total_loss')
                tf.summary.scalar('total_loss', self.total_loss)
                tf.summary.scalar('loss', self.losses)

            # 5. Build Estimate eval
            with tf.variable_scope("accuracy"):
                correct_predictions = tf.equal(self.predictions,
                                               tf.cast(self.output, tf.int64))
                self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                       tf.float32),
                                               name='accuracy')
                tf.summary.scalar('accuracy', self.accuracy)
Esempio n. 12
0
    def regular_log_prob_fn(params):
      if regression_hier_type == "none":
        beta = params
        beta_scaled = beta
      elif regression_hier_type == "centered":
        mu_0 = params[Ellipsis, -1]
        tau_0 = tf.nn.softplus(params[Ellipsis, -2])
        beta = params[Ellipsis, :-2]
        beta_scaled = beta
      elif regression_hier_type == "non_centered":
        mu_0 = params[Ellipsis, -1]
        tau_0 = tf.nn.softplus(params[Ellipsis, -2])
        beta = params[Ellipsis, :-2]
        beta_scaled = beta / tf.expand_dims(tau_0, -1) + tf.expand_dims(
            mu_0, -1)
      else:
        raise ValueError("Unknown regression_hier_type:" + regression_hier_type)

      if batch_size:

        def body(_, i):
          y_dist = tfd.Categorical(
              logits=tf.einsum(
                  "ij,kjm->kim", x[i:i + batch_size],
                  tf.reshape(beta_scaled, [-1, num_features, num_classes])))
          return tf.reduce_sum(y_dist.log_prob(y[i:i + batch_size]), -1)

        log_prob = tf.reduce_sum(
            tf.scan(
                body,
                tf.range(0, x.shape[0], batch_size),
                initializer=tf.zeros(tf.shape(params)[:1]),
                parallel_iterations=1), 0)
      else:
        y_dist = tfd.Categorical(
            logits=tf.einsum(
                "ij,kjm->kim", x,
                tf.reshape(beta_scaled, [-1, num_features, num_classes])))
        log_prob = tf.reduce_sum(y_dist.log_prob(y), -1)

      def make_beta_dist(loc, scale):
        if regression_beta_prior == "normal":
          return tfd.Normal(loc=loc, scale=scale)
        else:
          if tf.convert_to_tensor(loc).shape.ndims == 0:
            loc = tf.fill(
                tf.stack([tf.shape(params)[0], num_features * num_classes]),
                loc)
          if tf.convert_to_tensor(scale).shape.ndims == 0:
            scale = tf.fill(
                tf.stack([tf.shape(params)[0], num_features * num_classes]),
                scale)

          scale = tf.linalg.LinearOperatorDiag(scale)
          return tfd.MultivariateStudentTLinearOperator(
              loc=loc, scale=scale, df=t_dof)

      if regression_hier_type == "none":
        beta_dist = make_beta_dist(loc=0.0, scale=10.0)
      else:
        mu_0_dist = tfd.Normal(loc=0.0, scale=10.0)
        tau_0_dist = tfd.Gamma(2.0, 1.0)
        log_prob += mu_0_dist.log_prob(mu_0) + tau_0_dist.log_prob(tau_0)

        if regression_hier_type == "centered":
          mu_0 = tf.tile(
              tf.expand_dims(mu_0, -1), [1, num_features * num_classes])
          tau_0 = tf.tile(
              tf.expand_dims(tau_0, -1), [1, num_features * num_classes])
          beta_dist = make_beta_dist(loc=mu_0, scale=1.0 / tau_0)
        elif regression_hier_type == "non_centered":
          beta_dist = make_beta_dist(loc=0.0, scale=1.0)
      log_prob += tf.reduce_sum(beta_dist.log_prob(beta), -1)
      return log_prob
def multilevel_roi_align(features, boxes, box_levels, output_size,
                         num_samples_per_cell_y=1, num_samples_per_cell_x=1,
                         align_corners=False, extrapolation_value=0.0,
                         scope=None):
  """Applies RoI Align op and returns feature for boxes.

  Given multiple features maps indexed by different levels, and a set of boxes
  where each box is mapped to a certain level, this function selectively crops
  and resizes boxes from the corresponding feature maps.

  We follow the RoI Align technique in https://arxiv.org/pdf/1703.06870.pdf
  figure 3. Specifically, each box is subdivided uniformly into a grid
  consisting of output_size[0] x output_size[1] rectangular cells. Within each
  cell we select `num_points` points uniformly and compute feature values using
  bilinear interpolation. Finally, we average pool the interpolated values in
  each cell to obtain a [output_size[0], output_size[1], channels] feature.

  If `align_corners` is true, sampling points are uniformly spread such that
  corner points exactly overlap corners of the boxes.

  In this function we also follow the convention of treating feature pixels as
  point objects with no spatial extent.

  Args:
    features: A list of 4D float tensors of shape [batch_size, max_height,
      max_width, channels] containing features. Note that each feature map must
      have the same number of channels.
    boxes: A 3D float tensor of shape [batch_size, num_boxes, 4] containing
      boxes of the form [ymin, xmin, ymax, xmax] in normalized coordinates.
    box_levels: A 3D int32 tensor of shape [batch_size, num_boxes]
      representing the feature level index for each box.
    output_size: An list of two integers [size_y, size_x] indicating the output
      feature size for each box.
    num_samples_per_cell_y: Number of grid points to sample along y axis in each
      cell.
    num_samples_per_cell_x: Number of grid points to sample along x axis in each
      cell.
    align_corners: Whether to align the corner grid points exactly with box
      corners.
    extrapolation_value: a float value to use for extrapolation.
    scope: Scope name to use for this op.

  Returns:
    A 5D float tensor of shape [batch_size, num_boxes, output_size[0],
    output_size[1], channels] representing the cropped features.
  """
  with tf.name_scope(scope, 'MultiLevelRoIAlign'):
    features, true_feature_shapes = pad_to_max_size(features)
    batch_size = tf.shape(features)[0]
    num_levels = features.get_shape().as_list()[1]
    max_feature_height = tf.shape(features)[2]
    max_feature_width = tf.shape(features)[3]
    num_filters = features.get_shape().as_list()[4]
    num_boxes = tf.shape(boxes)[1]

    # Convert boxes to absolute co-ordinates.
    true_feature_shapes = tf.cast(true_feature_shapes, dtype=boxes.dtype)
    true_feature_shapes = tf.gather(true_feature_shapes, box_levels)
    boxes *= tf.concat([true_feature_shapes - 1] * 2, axis=-1)

    size_y = output_size[0] * num_samples_per_cell_y
    size_x = output_size[1] * num_samples_per_cell_x
    box_grid_y, box_grid_x = box_grid_coordinate_vectors(
        boxes, size_y=size_y, size_x=size_x, align_corners=align_corners)
    (feature_grid_y0, feature_grid_x0, feature_grid_y1,
     feature_grid_x1) = feature_grid_coordinate_vectors(box_grid_y, box_grid_x)
    feature_grid_y = tf.reshape(
        tf.stack([feature_grid_y0, feature_grid_y1], axis=3),
        [batch_size, num_boxes, -1])
    feature_grid_x = tf.reshape(
        tf.stack([feature_grid_x0, feature_grid_x1], axis=3),
        [batch_size, num_boxes, -1])
    feature_coordinates = ravel_indices(feature_grid_y, feature_grid_x,
                                        num_levels, max_feature_height,
                                        max_feature_width, box_levels)
    valid_indices = _valid_indicator(feature_grid_y, feature_grid_x,
                                     true_feature_shapes)
    feature_coordinates = tf.where(valid_indices, feature_coordinates,
                                   -1 * tf.ones_like(feature_coordinates))
    flattened_features = tf.reshape(features, [-1, num_filters])
    flattened_feature_values = _gather_valid_indices(flattened_features,
                                                     feature_coordinates,
                                                     extrapolation_value)
    features_per_box = tf.reshape(
        flattened_feature_values,
        [batch_size, num_boxes, size_y * 2, size_x * 2, num_filters])

    # Cast tensors into dtype of features.
    box_grid_y = tf.cast(box_grid_y, dtype=features_per_box.dtype)
    box_grid_x = tf.cast(box_grid_x, dtype=features_per_box.dtype)
    feature_grid_y0 = tf.cast(feature_grid_y0, dtype=features_per_box.dtype)
    feature_grid_x0 = tf.cast(feature_grid_x0, dtype=features_per_box.dtype)

    # RoI Align operation is a bilinear interpolation of four
    # neighboring feature points f0, f1, f2, and f3 onto point y, x given by
    # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
    #                       [f10, f11]]
    #
    # Unrolling the matrix multiplies gives us:
    # f(y, x) = (hy * hx) f00 + (hy * lx) f01 + (ly * hx) f10 + (lx * ly) f11
    # f(y, x) = w00 * f00 + w01 * f01 + w10 * f10 + w11 * f11
    #
    # This can be computed by applying pointwise multiplication and sum_pool in
    # a 2x2 window.
    ly = box_grid_y - feature_grid_y0
    lx = box_grid_x - feature_grid_x0
    hy = 1.0 - ly
    hx = 1.0 - lx

    kernel_y = tf.reshape(
        tf.stack([hy, ly], axis=3), [batch_size, num_boxes, size_y * 2, 1])

    kernel_x = tf.reshape(
        tf.stack([hx, lx], axis=3), [batch_size, num_boxes, 1, size_x * 2])

    # Multiplier 4 is to make tf.nn.avg_pool behave like sum_pool.
    interpolation_kernel = kernel_y * kernel_x * 4

    # Interpolate the gathered features with computed interpolation kernels.
    features_per_box *= tf.expand_dims(interpolation_kernel, axis=4),
    features_per_box = tf.reshape(
        features_per_box,
        [batch_size * num_boxes, size_y * 2, size_x * 2, num_filters])

    # This combines the two pooling operations - sum_pool to perform bilinear
    # interpolation and avg_pool to pool the values in each bin.
    features_per_box = tf.nn.avg_pool(
        features_per_box,
        [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1],
        [1, num_samples_per_cell_y * 2, num_samples_per_cell_x * 2, 1], 'VALID')
    features_per_box = tf.reshape(
        features_per_box,
        [batch_size, num_boxes, output_size[0], output_size[1], num_filters])

    return features_per_box
Esempio n. 14
0
    def simulate(self, action):
        with tf.name_scope("environment/simulate"):
            actions = tf.concat([tf.expand_dims(action, axis=1)] *
                                self._num_frames,
                                axis=1)
            history = self.history_buffer.get_all_elements()
            with tf.variable_scope(tf.get_variable_scope(),
                                   reuse=tf.AUTO_REUSE):
                # We only need 1 target frame here, set it.
                hparams_target_frames = self._model.hparams.video_num_target_frames
                self._model.hparams.video_num_target_frames = 1
                model_output = self._model.infer({
                    "inputs":
                    history,
                    "input_action":
                    actions,
                    "reset_internal_states":
                    self._reset_model.read_value()
                })
                self._model.hparams.video_num_target_frames = hparams_target_frames

            observ = tf.cast(tf.squeeze(model_output["targets"], axis=1),
                             self.observ_dtype)

            reward = tf.to_float(model_output["target_reward"])
            reward = tf.reshape(reward,
                                shape=(self.batch_size, )) + self._min_reward

            if self._intrinsic_reward_scale:
                # Use the model's uncertainty about its prediction as an intrinsic
                # reward. The uncertainty is measured by the log probability of the
                # predicted pixel value.
                if "targets_logits" not in model_output:
                    raise ValueError(
                        "The use of intrinsic rewards requires access to "
                        "the logits. Ensure that model.infer returns "
                        "'targets_logits'")
                uncertainty_reward = compute_uncertainty_reward(
                    model_output["targets_logits"], model_output["targets"])
                uncertainty_reward = tf.minimum(
                    1., self._intrinsic_reward_scale * uncertainty_reward)
                uncertainty_reward = tf.Print(uncertainty_reward,
                                              [uncertainty_reward],
                                              message="uncertainty_reward",
                                              first_n=1,
                                              summarize=8)
                reward += uncertainty_reward

            done = tf.constant(False, tf.bool, shape=(self.batch_size, ))

            with tf.control_dependencies([observ]):
                dump_frame_op = tf.cond(
                    self._video_condition,
                    lambda: tf.py_func(
                        self._video_dump_frame,  # pylint: disable=g-long-lambda
                        [observ, reward],
                        []),
                    tf.no_op)
                with tf.control_dependencies([
                        self._observ.assign(observ),
                        self.history_buffer.move_by_one_element(observ),
                        dump_frame_op
                ]):
                    clear_reset_model_op = tf.assign(self._reset_model,
                                                     tf.constant(0.0))
                    with tf.control_dependencies([clear_reset_model_op]):
                        return tf.identity(reward), tf.identity(done)
Esempio n. 15
0
def _single_column_cell_selection_loss(token_logits, column_logits, label_ids,
                                       cell_index, col_index, cell_mask):
    """Computes the loss for cell selection constrained to a single column.

  The loss is a hierarchical log-likelihood. The model first predicts a column
  and then selects cells within that column (conditioned on the column). Cells
  outside the selected column are never selected.

  Args:
    token_logits: <float>[batch_size, seq_length] Logits per token.
    column_logits: <float>[batch_size, max_num_cols] Logits per column.
    label_ids: <int32>[batch_size, seq_length] Labels per token.
    cell_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
      groups tokens into cells.
    col_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
      groups tokens into columns.
    cell_mask: <float>[batch_size, max_num_rows * max_num_cols] Input mask per
      cell, 1 for cells that exists in the example and 0 for padding.

  Returns:
    selection_loss_per_example: <float>[batch_size] Loss for each example.
    logits: <float>[batch_size, seq_length] New logits which are only allowed
      to select cells in a single column. Logits outside of the most likely
      column according to `column_logits` will be set to a very low value
      (such that the probabilities are 0).
  """
    # First find the column we should select. We use the column with maximum
    # number of selected cells.
    labels_per_column, _ = segmented_tensor.reduce_sum(
        tf.cast(label_ids, tf.float32), col_index)
    column_label = tf.argmax(labels_per_column, axis=-1, output_type=tf.int32)
    # Check if there are no selected cells in the column. In that case the model
    # should predict the special column id 0, which means "select nothing".
    no_cell_selected = tf.equal(tf.reduce_max(labels_per_column, axis=-1), 0)
    column_label = tf.where(no_cell_selected, tf.zeros_like(column_label),
                            column_label)

    column_dist = tfp.distributions.Categorical(logits=column_logits)
    column_loss_per_example = -column_dist.log_prob(column_label)

    # Reduce the labels and logits to per-cell from per-token.
    logits_per_cell, _ = segmented_tensor.reduce_mean(token_logits, cell_index)
    labels_per_cell, labels_index = segmented_tensor.reduce_max(
        tf.cast(label_ids, tf.int32), cell_index)

    # Mask for the selected column.
    column_id_for_cells = cell_index.project_inner(labels_index).indices
    column_mask = tf.cast(
        tf.equal(column_id_for_cells, tf.expand_dims(column_label, axis=1)),
        tf.float32)

    # Compute the log-likelihood for cells, but only for the selected column.
    cell_dist = tfp.distributions.Bernoulli(logits=logits_per_cell)
    cell_log_prob = cell_dist.log_prob(labels_per_cell)
    cell_loss = -tf.reduce_sum(cell_log_prob * column_mask * cell_mask, axis=1)
    # We need to normalize the loss by the number of cells in the column.
    cell_loss /= tf.reduce_sum(column_mask * cell_mask,
                               axis=1) + _EPSILON_ZERO_DIVISION

    selection_loss_per_example = column_loss_per_example
    selection_loss_per_example += tf.where(
        no_cell_selected, tf.zeros_like(selection_loss_per_example), cell_loss)

    # Set the probs outside the selected column (selected by the *model*)
    # to 0. This ensures backwards compatibility with models that select
    # cells from multiple columns.
    selected_column_id = tf.argmax(column_logits,
                                   axis=-1,
                                   output_type=tf.int32)
    selected_column_mask = tf.cast(
        tf.equal(column_id_for_cells,
                 tf.expand_dims(selected_column_id, axis=-1)), tf.float32)
    # Never select cells with the special column id 0.
    selected_column_mask = tf.where(tf.equal(column_id_for_cells, 0),
                                    tf.zeros_like(selected_column_mask),
                                    selected_column_mask)
    logits_per_cell += _CLOSE_ENOUGH_TO_LOG_ZERO * (
        1.0 - cell_mask * selected_column_mask)
    logits = segmented_tensor.gather(logits_per_cell, cell_index)

    return selection_loss_per_example, logits
Esempio n. 16
0
  def _encoder_preprocessor(
      self, position_sequence, n_node, global_context, particle_types):
    # Extract important features from the position_sequence.
    most_recent_position = position_sequence[:, -1]
    velocity_sequence = time_diff(position_sequence)  # Finite-difference.

    # Get connectivity of the graph.
    (senders, receivers, n_edge
     ) = connectivity_utils.compute_connectivity_for_batch_pyfunc(
         most_recent_position, n_node, self._connectivity_radius)

    # Collect node features.
    node_features = []

    # Normalized velocity sequence, merging spatial an time axis.
    velocity_stats = self._normalization_stats["velocity"]
    normalized_velocity_sequence = (
        velocity_sequence - velocity_stats.mean) / velocity_stats.std

    flat_velocity_sequence = snt.MergeDims(start=1, size=2)(
        normalized_velocity_sequence)
    node_features.append(flat_velocity_sequence)

    # Normalized clipped distances to lower and upper boundaries.
    # boundaries are an array of shape [num_dimensions, 2], where the second
    # axis, provides the lower/upper boundaries.
    boundaries = tf.constant(self._boundaries, dtype=tf.float32)
    distance_to_lower_boundary = (
        most_recent_position - tf.expand_dims(boundaries[:, 0], 0))
    distance_to_upper_boundary = (
        tf.expand_dims(boundaries[:, 1], 0) - most_recent_position)
    distance_to_boundaries = tf.concat(
        [distance_to_lower_boundary, distance_to_upper_boundary], axis=1)
    normalized_clipped_distance_to_boundaries = tf.clip_by_value(
        distance_to_boundaries / self._connectivity_radius, -1., 1.)
    node_features.append(normalized_clipped_distance_to_boundaries)

    # Particle type.
    if self._num_particle_types > 1:
      particle_type_embeddings = tf.nn.embedding_lookup(
          self._particle_type_embedding, particle_types)
      node_features.append(particle_type_embeddings)

    # Collect edge features.
    edge_features = []

    # Relative displacement and distances normalized to radius
    normalized_relative_displacements = (
        tf.gather(most_recent_position, senders) -
        tf.gather(most_recent_position, receivers)) / self._connectivity_radius
    edge_features.append(normalized_relative_displacements)

    normalized_relative_distances = tf.norm(
        normalized_relative_displacements, axis=-1, keepdims=True)
    edge_features.append(normalized_relative_distances)

    # Normalize the global context.
    if global_context is not None:
      context_stats = self._normalization_stats["context"]
      # Context in some datasets are all zero, so add an epsilon for numerical
      # stability.
      global_context = (global_context - context_stats.mean) / tf.math.maximum(
          context_stats.std, STD_EPSILON)

    return gn.graphs.GraphsTuple(
        nodes=tf.concat(node_features, axis=-1),
        edges=tf.concat(edge_features, axis=-1),
        globals=global_context,  # self._graph_net will appending this to nodes.
        n_node=n_node,
        n_edge=n_edge,
        senders=senders,
        receivers=receivers,
        )
Esempio n. 17
0
def detection_loss(cls_outputs, box_outputs, labels, params):
    """Computes total detection loss.

  Computes total detection loss including box and class loss from all levels.
  Args:
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in [batch_size, height, width,
      num_anchors * 4].
    labels: the dictionary that returned from dataloader that includes
      groundtruth targets.
    params: the dictionary including training parameters specified in
      default_haprams function in this file.

  Returns:
    total_loss: an integer tensor representing total loss reducing from
      class and box losses from all levels.
    cls_loss: an integer tensor representing total class loss.
    box_loss: an integer tensor representing total box regression loss.
    box_iou_loss: an integer tensor representing total box iou loss.
  """
    # Sum all positives in a batch for normalization and avoid zero
    # num_positives_sum, which would lead to inf loss during training
    num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0
    if params.get('positives_momentum', 0) > 0:
        # normalize the num_positive_examples for training stability.
        moving_normalizer_var = tf.Variable(
            0.0,
            name='moving_normalizer',
            dtype=tf.float32,
            synchronization=tf.VariableSynchronization.ON_READ,
            trainable=False,
            aggregation=tf.VariableAggregation.MEAN)
        num_positives_sum = tf.keras.backend.moving_average_update(
            moving_normalizer_var,
            num_positives_sum,
            momentum=params['positives_momentum'])
    elif params['positives_momentum'] < 0:
        num_positives_sum = utils.cross_replica_mean(num_positives_sum)

    levels = cls_outputs.keys()
    cls_losses = []
    box_losses = []
    for level in levels:
        # Onehot encoding for classification labels.
        cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level],
                                          params['num_classes'])

        if params['data_format'] == 'channels_first':
            bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list(
            )
            cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                              [bs, -1, width, height])
        else:
            bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list(
            )
            cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                              [bs, width, height, -1])
        box_targets_at_level = labels['box_targets_%d' % level]

        cls_loss = focal_loss(cls_outputs[level],
                              cls_targets_at_level,
                              params['alpha'],
                              params['gamma'],
                              normalizer=num_positives_sum,
                              label_smoothing=params['label_smoothing'])

        if params['data_format'] == 'channels_first':
            cls_loss = tf.reshape(
                cls_loss, [bs, -1, width, height, params['num_classes']])
        else:
            cls_loss = tf.reshape(
                cls_loss, [bs, width, height, -1, params['num_classes']])
        cls_loss *= tf.cast(
            tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2),
                           -1), tf.float32)
        cls_losses.append(tf.clip_by_value(tf.reduce_sum(cls_loss), 0.0, 2.0))

        if params['box_loss_weight']:
            box_losses.append(
                _box_loss(box_outputs[level],
                          box_targets_at_level,
                          num_positives_sum,
                          delta=params['delta']))

    if params['iou_loss_type']:
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels]
        box_outputs = tf.concat(box_output_list, axis=0)
        box_target_list = [
            tf.reshape(labels['box_targets_%d' % level], [-1, 4])
            for level in levels
        ]
        box_targets = tf.concat(box_target_list, axis=0)
        anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1])
        box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes)
        box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes)
        box_iou_loss = _box_iou_loss(box_outputs, box_targets,
                                     num_positives_sum,
                                     params['iou_loss_type'])

    else:
        box_iou_loss = 0

    # Sum per level losses to total loss.
    cls_loss = tf.add_n(cls_losses)
    box_loss = tf.add_n(box_losses) if box_losses else 0

    total_loss = (cls_loss + params['box_loss_weight'] * box_loss +
                  params['iou_loss_weight'] * box_iou_loss)

    return total_loss, cls_loss, box_loss, box_iou_loss
Esempio n. 18
0
def calc_center_bb(binary_class_mask):
    """ Returns the center of mass coordinates for the given binary_class_mask. """
    with tf.variable_scope('calc_center_bb'):
        binary_class_mask = tf.cast(binary_class_mask, tf.int32)
        binary_class_mask = tf.equal(binary_class_mask, 1)
        s = binary_class_mask.get_shape().as_list()
        if len(s) == 4:
            binary_class_mask = tf.squeeze(binary_class_mask, [3])

        s = binary_class_mask.get_shape().as_list()
        assert len(s) == 3, "binary_class_mask must be 3D."
        assert (s[0] < s[1]) and (
            s[0] < s[2]), "binary_class_mask must be [Batch, Width, Height]"

        # my meshgrid
        x_range = tf.expand_dims(tf.range(s[1]), 1)
        y_range = tf.expand_dims(tf.range(s[2]), 0)
        X = tf.tile(x_range, [1, s[2]])
        Y = tf.tile(y_range, [s[1], 1])

        bb_list = list()
        center_list = list()
        crop_size_list = list()
        for i in range(s[0]):
            X_masked = tf.cast(tf.boolean_mask(X, binary_class_mask[i, :, :]),
                               tf.float32)
            Y_masked = tf.cast(tf.boolean_mask(Y, binary_class_mask[i, :, :]),
                               tf.float32)

            x_min = tf.reduce_min(X_masked)
            x_max = tf.reduce_max(X_masked)
            y_min = tf.reduce_min(Y_masked)
            y_max = tf.reduce_max(Y_masked)

            start = tf.stack([x_min, y_min])
            end = tf.stack([x_max, y_max])
            bb = tf.stack([start, end], 1)
            bb_list.append(bb)

            center_x = 0.5 * (x_max + x_min)
            center_y = 0.5 * (y_max + y_min)
            center = tf.stack([center_x, center_y], 0)

            center = tf.cond(tf.reduce_all(tf.is_finite(center)),
                             lambda: center,
                             lambda: tf.constant([160.0, 160.0]))
            center.set_shape([2])
            center_list.append(center)

            crop_size_x = x_max - x_min
            crop_size_y = y_max - y_min
            crop_size = tf.expand_dims(tf.maximum(crop_size_x, crop_size_y), 0)
            crop_size = tf.cond(tf.reduce_all(tf.is_finite(crop_size)),
                                lambda: crop_size,
                                lambda: tf.constant([100.0]))
            crop_size.set_shape([1])
            crop_size_list.append(crop_size)

        bb = tf.stack(bb_list)
        center = tf.stack(center_list)
        crop_size = tf.stack(crop_size_list)

        return center, bb, crop_size
Esempio n. 19
0
        def metric_fn(**kwargs):
            """Returns a dictionary that has the evaluation metrics."""
            if params['nms_configs'].get('pyfunc', True):
                detections_bs = []
                for index in range(kwargs['boxes'].shape[0]):
                    nms_configs = params['nms_configs']
                    detections = tf.numpy_function(
                        functools.partial(nms_np.per_class_nms,
                                          nms_configs=nms_configs),
                        [
                            kwargs['boxes'][index],
                            kwargs['scores'][index],
                            kwargs['classes'][index],
                            tf.slice(kwargs['image_ids'], [index], [1]),
                            tf.slice(kwargs['image_scales'], [index], [1]),
                            params['num_classes'],
                            nms_configs['max_output_size'],
                        ], tf.float32)
                    detections_bs.append(detections)
                detections_bs = postprocess.transform_detections(
                    tf.stack(detections_bs))
            else:
                # These two branches should be equivalent, but currently they are not.
                # TODO(tanmingxing): enable the non_pyfun path after bug fix.
                nms_boxes, nms_scores, nms_classes, _ = postprocess.per_class_nms(
                    params, kwargs['boxes'], kwargs['scores'],
                    kwargs['classes'], kwargs['image_scales'])
                img_ids = tf.cast(tf.expand_dims(kwargs['image_ids'], -1),
                                  nms_scores.dtype)
                detections_bs = [
                    img_ids * tf.ones_like(nms_scores),
                    nms_boxes[:, :, 1],
                    nms_boxes[:, :, 0],
                    nms_boxes[:, :, 3] - nms_boxes[:, :, 1],
                    nms_boxes[:, :, 2] - nms_boxes[:, :, 0],
                    nms_scores,
                    nms_classes,
                ]
                detections_bs = tf.stack(detections_bs,
                                         axis=-1,
                                         name='detnections')

            if params.get('testdev_dir', None):
                logging.info('Eval testdev_dir %s', params['testdev_dir'])
                eval_metric = coco_metric.EvaluationMetric(
                    testdev_dir=params['testdev_dir'])
                coco_metrics = eval_metric.estimator_metric_fn(
                    detections_bs, tf.zeros([1]))
            else:
                logging.info('Eval val with groudtruths %s.',
                             params['val_json_file'])
                eval_metric = coco_metric.EvaluationMetric(
                    filename=params['val_json_file'])
                coco_metrics = eval_metric.estimator_metric_fn(
                    detections_bs, kwargs['groundtruth_data'],
                    params['label_map'])

            # Add metrics to output.
            cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
            box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
            output_metrics = {
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            }
            output_metrics.update(coco_metrics)
            return output_metrics
Esempio n. 20
0
  def _parse_train_data(self, data):
    """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
    classes = data['groundtruth_classes']
    boxes = data['groundtruth_boxes']
    if self._include_mask:
      masks = data['groundtruth_instance_masks']

    is_crowds = data['groundtruth_is_crowd']
    # Skips annotations with `is_crowd` = True.
    if self._skip_crowd_during_training and self._is_training:
      num_groundtrtuhs = tf.shape(classes)[0]
      with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
        indices = tf.cond(
            tf.greater(tf.size(is_crowds), 0),
            lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
            lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
      classes = tf.gather(classes, indices)
      boxes = tf.gather(boxes, indices)
      if self._include_mask:
        masks = tf.gather(masks, indices)

    # Gets original image and its size.
    image = data['image']
    image_shape = tf.shape(image)[0:2]

    # Normalizes image with mean and std pixel values.
    image = input_utils.normalize_image(image)

    # Flips image randomly during training.
    if self._aug_rand_hflip:
      if self._include_mask:
        image, boxes, masks = input_utils.random_horizontal_flip(
            image, boxes, masks)
      else:
        image, boxes = input_utils.random_horizontal_flip(
            image, boxes)

    # Converts boxes from normalized coordinates to pixel coordinates.
    # Now the coordinates of boxes are w.r.t. the original image.
    boxes = box_utils.denormalize_boxes(boxes, image_shape)

    # Resizes and crops image.
    image, image_info = input_utils.resize_and_crop_image(
        image,
        self._output_size,
        padded_size=input_utils.compute_padded_size(
            self._output_size, 2 ** self._max_level),
        aug_scale_min=self._aug_scale_min,
        aug_scale_max=self._aug_scale_max)
    image_height, image_width, _ = image.get_shape().as_list()

    # Resizes and crops boxes.
    # Now the coordinates of boxes are w.r.t the scaled image.
    image_scale = image_info[2, :]
    offset = image_info[3, :]
    boxes = input_utils.resize_and_crop_boxes(
        boxes, image_scale, image_info[1, :], offset)

    # Filters out ground truth boxes that are all zeros.
    indices = box_utils.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
    classes = tf.gather(classes, indices)
    if self._include_mask:
      masks = tf.gather(masks, indices)
      # Transfer boxes to the original image space and do normalization.
      cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
      cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2])
      cropped_boxes = box_utils.normalize_boxes(cropped_boxes, image_shape)
      num_masks = tf.shape(masks)[0]
      masks = tf.image.crop_and_resize(
          tf.expand_dims(masks, axis=-1),
          cropped_boxes,
          box_indices=tf.range(num_masks, dtype=tf.int32),
          crop_size=[self._mask_crop_size, self._mask_crop_size],
          method='bilinear')
      masks = tf.squeeze(masks, axis=-1)

    # Assigns anchor targets.
    # Note that after the target assignment, box targets are absolute pixel
    # offsets w.r.t. the scaled image.
    input_anchor = anchor.Anchor(
        self._min_level,
        self._max_level,
        self._num_scales,
        self._aspect_ratios,
        self._anchor_size,
        (image_height, image_width))
    anchor_labeler = anchor.RpnAnchorLabeler(
        input_anchor,
        self._rpn_match_threshold,
        self._rpn_unmatched_threshold,
        self._rpn_batch_size_per_im,
        self._rpn_fg_fraction)
    rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
        boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

    # If bfloat16 is used, casts input image to tf.bfloat16.
    if self._use_bfloat16:
      image = tf.cast(image, dtype=tf.bfloat16)

    # Packs labels for model_fn outputs.
    labels = {
        'anchor_boxes': input_anchor.multilevel_boxes,
        'image_info': image_info,
        'rpn_score_targets': rpn_score_targets,
        'rpn_box_targets': rpn_box_targets,
    }
    labels['gt_boxes'] = input_utils.pad_to_fixed_size(
        boxes, self._max_num_instances, -1)
    labels['gt_classes'] = input_utils.pad_to_fixed_size(
        classes, self._max_num_instances, -1)
    if self._include_mask:
      labels['gt_masks'] = input_utils.pad_to_fixed_size(
          masks, self._max_num_instances, -1)

    return image, labels
Esempio n. 21
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        logging.info("*** Model: Params ***")
        for name in sorted(params.keys()):
            logging.info("  %s = %s", name, params[name])
        logging.info("*** Model: Features ***")
        for name in sorted(features.keys()):
            logging.info("  name = %s, shape = %s", name, features[name].shape)

        model = modeling.ReadItTwiceBertModel(
            config=model_config, use_one_hot_embeddings=use_one_hot_embeddings)

        span_prediction_layer = modeling.SpanPredictionHead(
            intermediate_size=model_config.intermediate_size,
            dropout_rate=model_config.hidden_dropout_prob)

        # [batch_size, main_seq_length]
        token_ids = features["token_ids"]
        main_seq_length = tf.shape(token_ids)[1]
        block_ids = features["block_ids"]
        block_pos = features["block_pos"]

        annotation_begins = features.get("entity_annotation_begins")
        annotation_ends = features.get("entity_annotation_ends")
        annotation_labels = features.get("entity_annotation_labels")

        # Do not attend padding tokens
        # [batch_size, main_seq_length, main_seq_length]
        att_mask = tf.tile(
            tf.expand_dims(tf.not_equal(token_ids, padding_token_id), 1),
            [1, main_seq_length, 1])
        att_mask = tf.cast(att_mask, dtype=tf.int32)

        main_output = model(
            token_ids=token_ids,
            training=(mode == tf.estimator.ModeKeys.TRAIN),
            block_ids=block_ids,
            block_pos=block_pos,
            att_mask=att_mask,
            annotation_begins=annotation_begins,
            annotation_ends=annotation_ends,
            annotation_labels=annotation_labels,
            enable_side_inputs=enable_side_inputs,
            num_replicas_concat=num_replicas_concat,
            cross_block_attention_mode=cross_block_attention_mode,
        ).final_hidden_states

        span_logits = span_prediction_layer(
            hidden_states=main_output,
            token_ids=token_ids,
            padding_token_id=padding_token_id,
            ignore_prefix_length=features["prefix_length"],
            training=(mode == tf.estimator.ModeKeys.TRAIN))

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = checkpoint_utils.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                         init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            span_prediction_loss = losses.BatchSpanCrossEntropyLoss()

            total_loss = span_prediction_loss(
                logits=span_logits,
                annotation_begins=features["answer_annotation_begins"],
                annotation_ends=features["answer_annotation_ends"],
                annotation_labels=features["answer_annotation_labels"],
                block_ids=block_ids,
                num_replicas=num_replicas_concat,
                eps=1e-5)

            # Add regularization losses.
            if model.losses:
                total_loss += tf.math.add_n(model.losses)

            train_op = optimization.create_optimizer(total_loss,
                                                     learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps,
                                                     use_tpu,
                                                     optimizer,
                                                     poly_power,
                                                     start_warmup_step,
                                                     learning_rate_schedule,
                                                     reduce_loss_sum=True)

            host_inputs = {
                "global_step":
                tf.expand_dims(tf.train.get_or_create_global_step(), 0),
                "train_metrics/loss":
                tf.expand_dims(total_loss, 0),
            }
            host_call = (functools.partial(record_summary_host_fn,
                                           metrics_dir=os.path.join(
                                               FLAGS.output_dir,
                                               "train_metrics")), host_inputs)

            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn,
                host_call=host_call)
        elif mode == tf.estimator.ModeKeys.PREDICT:
            begin_logits_values, begin_logits_indices = tf.math.top_k(
                span_logits[:, :, 0],
                k=nbest_logits_for_eval,
            )
            end_logits_values, end_logits_indices = tf.math.top_k(
                span_logits[:, :, 1],
                k=nbest_logits_for_eval,
            )

            predictions = {
                "block_ids": tf.identity(block_ids),
                "begin_logits_values": begin_logits_values,
                "begin_logits_indices": begin_logits_indices,
                "end_logits_values": end_logits_values,
                "end_logits_indices": end_logits_indices,
                "token_ids": tf.identity(token_ids),
            }
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and PREDICT modes is supported: %s" %
                             (mode))

        return output_spec
Esempio n. 22
0
    def body(self, features):
        hp = self.hparams
        # pylint: disable=eval-used
        if hp.image_input_type == "image":
            image_feat = vqa_layers.image_embedding(
                features["inputs"],
                model_fn=eval(hp.image_model_fn),
                trainable=hp.train_resnet,
                is_training=hp.mode == tf_estimator.ModeKeys.TRAIN)
        else:
            image_feat = features["inputs"]

        image_feat = common_layers.flatten4d3d(image_feat)
        image_feat = common_layers.dense(image_feat, hp.hidden_size)
        utils.collect_named_outputs("norms", "image_feat_after_proj",
                                    tf.norm(image_feat, axis=-1))

        question = common_layers.flatten4d3d(features["question"])
        utils.collect_named_outputs("norms", "question_embedding",
                                    tf.norm(question, axis=-1))
        (encoder_input, encoder_self_attention_bias,
         encoder_decoder_attention_bias) = prepare_image_question_encoder(
             image_feat, question, hp)

        encoder_input = tf.nn.dropout(encoder_input,
                                      keep_prob=1. -
                                      hp.layer_prepostprocess_dropout)

        encoder_output, _ = recurrent_transformer_decoder(
            encoder_input,
            None,
            encoder_self_attention_bias,
            None,
            hp,
            name="encoder")
        utils.collect_named_outputs("norms", "encoder_output",
                                    tf.norm(encoder_output, axis=-1))

        # scale query by sqrt(hidden_size)
        query = tf.get_variable("query",
                                [hp.hidden_size]) * hp.hidden_size**0.5
        query = tf.expand_dims(tf.expand_dims(query, axis=0), axis=0)
        batch_size = common_layers.shape_list(encoder_input)[0]
        query = tf.tile(query, [batch_size, 1, 1])
        query = tf.nn.dropout(query,
                              keep_prob=1. - hp.layer_prepostprocess_dropout)

        decoder_output, _ = recurrent_transformer_decoder(
            query,
            encoder_output,
            None,
            encoder_decoder_attention_bias,
            hp,
            name="decoder")
        utils.collect_named_outputs("norms", "decoder_output",
                                    tf.norm(decoder_output, axis=-1))

        norm_tensors = utils.convert_collection_to_dict("norms")
        vqa_layers.summarize_tensors(norm_tensors, tag="norms/")

        # Expand dimension 1 and 2
        return tf.expand_dims(decoder_output, axis=1)
Esempio n. 23
0
def compress(args):
  """Compresses an image."""

  # Load input image and add batch dimension.
  x = read_png(args.input_file)
  x = tf.expand_dims(x, 0)
  x.set_shape([1, None, None, 3])
  x_shape = tf.shape(x)

  # Instantiate model.
  analysis_transform = AnalysisTransform(args.num_filters)
  entropy_bottleneck = tfc.EntropyBottleneck()
  synthesis_transform = SynthesisTransform(args.num_filters)

  # Transform and compress the image.
  y = analysis_transform(x)
  string = entropy_bottleneck.compress(y)

  # Transform the quantized image back (if requested).
  y_hat, likelihoods = entropy_bottleneck(y, training=False)
  x_hat = synthesis_transform(y_hat)
  x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]

  num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)

  # Total number of bits divided by number of pixels.
  eval_bpp = tf.reduce_sum(tf.log(likelihoods)) / (-np.log(2) * num_pixels)

  # Bring both images back to 0..255 range.
  x *= 255
  x_hat = tf.clip_by_value(x_hat, 0, 1)
  x_hat = tf.round(x_hat * 255)

  mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
  psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
  msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

  with tf.Session() as sess:
    # Load the latest model checkpoint, get the compressed string and the tensor
    # shapes.
    latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
    tf.train.Saver().restore(sess, save_path=latest)
    tensors = [string, tf.shape(x)[1:-1], tf.shape(y)[1:-1]]
    arrays = sess.run(tensors)

    # Write a binary file with the shape information and the compressed string.
    packed = tfc.PackedTensors()
    packed.pack(tensors, arrays)
    with open(args.output_file, "wb") as f:
      f.write(packed.string)

    # If requested, transform the quantized image back and measure performance.
    if args.verbose:
      eval_bpp, mse, psnr, msssim, num_pixels = sess.run(
          [eval_bpp, mse, psnr, msssim, num_pixels])

      # The actual bits per pixel including overhead.
      bpp = len(packed.string) * 8 / num_pixels

      print("Mean squared error: {:0.4f}".format(mse))
      print("PSNR (dB): {:0.2f}".format(psnr))
      print("Multiscale SSIM: {:0.4f}".format(msssim))
      print("Multiscale SSIM (dB): {:0.2f}".format(-10 * np.log10(1 - msssim)))
      print("Information content in bpp: {:0.4f}".format(eval_bpp))
      print("Actual bits per pixel: {:0.4f}".format(bpp))
def compress(args):
    """
    compress an image
    :param args:
    :return:
    """

    img = Image.open(args.input_file)
    w, h = img.size

    # Load input image and add batch dimension.
    x = read_png(args.input_file)
    x = tf.expand_dims(x, 0)
    x.set_shape([1, h, w, 3])
    x_shape = tf.shape(x)

    # Instantiate model.
    analysis_transform = AnalysisTransform(args.num_filters)
    synthesis_transform = SynthesisTransform(args.num_filters)
    hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
    hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()
    entropy_bottleneck1 = tfc.EntropyBottleneck()
    ftransform = FTransform(args.num_filters)

    # Transform and compress the image.
    y = analysis_transform(x)
    y_shape = tf.shape(y)
    y_prime, y_prime_likelihoods = entropy_bottleneck1(y, training=False)
    z = hyper_analysis_transform(abs(y_prime))
    z_hat, z_likelihoods = entropy_bottleneck(z, training=False)
    c_prime = hyper_synthesis_transform(z_hat)
    c_prime = c_prime[:, :y_shape[1], :y_shape[2], :]
    mean, sigma = get_sigma_mu(y_prime, c_prime, ftransform)

    scale_table = np.exp(
        np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
    conditional_bottleneck = tfc.GaussianConditional(sigma,
                                                     scale_table,
                                                     mean=mean)
    side_string = entropy_bottleneck.compress(z)
    string = conditional_bottleneck.compress(y)
    y_string = entropy_bottleneck1.compress(y)

    # Transform the quantized image back (if requested).
    y_hat, y_likelihoods = conditional_bottleneck(y, training=False)
    x_hat = synthesis_transform(y_hat)
    x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]

    num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)

    # Total number of bits divided by number of pixels.
    eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(
        tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

    # Bring both images back to 0..255 range.
    x *= 255
    x_hat = tf.clip_by_value(x_hat, 0, 1)  # 将每个维度控制在0,1之间
    x_hat = tf.round(x_hat * 255)

    mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
    psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
    msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

    with tf.Session() as sess:
        # Load the latest model checkpoint, get the compressed string and the tensor
        # shapes.
        latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
        tf.train.Saver().restore(sess, save_path=latest)
        tensors = [
            string, side_string,
            tf.shape(x)[1:-1],
            tf.shape(y)[1:-1],
            tf.shape(z)[1:-1], y_string
        ]
        arrays = sess.run(tensors)
        print(sess.run([tf.shape(sigma), tf.shape(y), y_hat]))

        # Write a binary file with the shape information and the compressed string.
        packed = tfc.PackedTensors()
        packed.pack(tensors, arrays)
        with open(args.output_file, "wb") as f:
            f.write(packed.string)

        # If requested, transform the quantized image back and measure performance.
        if args.verbose:
            eval_bpp, mse, psnr, msssitm, num_pixels = sess.run(
                [eval_bpp, mse, psnr, msssim, num_pixels])

            # The actual bits per pixel including overhead.
            bpp = len(packed.string) * 8 / num_pixels

            print("Mean squared error: {:0.4f}".format(mse))
            print("PSNR (dB): {:0.2f}".format(psnr))
            print("Multiscale SSIM: {:0.4f}".format(msssim))
            print("Multiscale SSIM (dB): {:0.2f}".format(-10 *
                                                         np.log10(1 - msssim)))
            print("Information content in bpp: {:0.4f}".format(eval_bpp))
            print("Actual bits per pixel: {:0.4f}".format(bpp))
Esempio n. 25
0
def GlobalToGenerator(inputs, channels):
    with tf.variable_scope("GlobalToGenerator1"):
        fc1 = fullyConnected(inputs, channels, False,
                             "fullyConnected_global_to_unet",
                             0.01)  #Why so low ?
    return tf.expand_dims(tf.expand_dims(fc1, axis=1), axis=1)
Esempio n. 26
0
    def prepare_processing_graph(self, flags):
        """Builds a TensorFlow graph to apply the input distortions.

    Creates a graph that loads a WAVE file, decodes it, scales the volume,
    shifts it in time, adds in background noise, calculates a spectrogram, and
    then builds an MFCC fingerprint from that.

    This must be called with an active TensorFlow session running, and it
    creates multiple placeholder inputs, and one output:

      - wav_filename_placeholder_: Filename of the WAV to load.
      - foreground_volume_placeholder_: How loud the main clip should be.
      - foreground_resampling_placeholder_: Controls signal stretching/squeezing
      - time_shift_padding_placeholder_: Where to pad the clip.
      - time_shift_offset_placeholder_: How much to move the clip in time.
      - background_data_placeholder_: PCM sample data for background noise.
      - background_volume_placeholder_: Loudness of mixed-in background.
      - output_: Output 2D fingerprint of processed audio or raw audio.

    Args:
      flags: data and model parameters, described at model_train.py

    Raises:
      ValueError: If the preprocessing mode isn't recognized.
      Exception: If the preprocessor wasn't compiled in.
    """
        with tf.get_default_graph().name_scope('data'):
            desired_samples = flags.desired_samples
            self.wav_filename_placeholder_ = tf.placeholder(
                tf.string, [], name='wav_filename')
            wav_loader = io_ops.read_file(self.wav_filename_placeholder_)
            wav_decoder = tf.audio.decode_wav(wav_loader,
                                              desired_channels=1,
                                              desired_samples=desired_samples)
            # Allow the audio sample's volume to be adjusted.
            self.foreground_volume_placeholder_ = tf.placeholder(
                tf.float32, [], name='foreground_volume')
            # signal resampling to generate more training data
            # it will stretch or squeeze input signal proportinally to:
            self.foreground_resampling_placeholder_ = tf.placeholder(
                tf.float32, [])

            if self.foreground_resampling_placeholder_ != 1.0:
                image = tf.expand_dims(wav_decoder.audio, 0)
                image = tf.expand_dims(image, 2)
                shape = tf.shape(wav_decoder.audio)
                image_resized = tf.image.resize(
                    images=image,
                    size=(tf.cast((tf.cast(shape[0], tf.float32) *
                                   self.foreground_resampling_placeholder_),
                                  tf.int32), 1),
                    preserve_aspect_ratio=False)
                image_resized_cropped = tf.image.resize_with_crop_or_pad(
                    image_resized,
                    target_height=desired_samples,
                    target_width=1,
                )
                image_resized_cropped = tf.squeeze(image_resized_cropped,
                                                   axis=[0, 3])
                scaled_foreground = tf.multiply(
                    image_resized_cropped, self.foreground_volume_placeholder_)
            else:
                scaled_foreground = tf.multiply(
                    wav_decoder.audio, self.foreground_volume_placeholder_)
            # Shift the sample's start position, and pad any gaps with zeros.
            self.time_shift_padding_placeholder_ = tf.placeholder(
                tf.int32, [2, 2], name='time_shift_padding')
            self.time_shift_offset_placeholder_ = tf.placeholder(
                tf.int32, [2], name='time_shift_offset')
            padded_foreground = tf.pad(
                tensor=scaled_foreground,
                paddings=self.time_shift_padding_placeholder_,
                mode='CONSTANT')
            sliced_foreground = tf.slice(padded_foreground,
                                         self.time_shift_offset_placeholder_,
                                         [desired_samples, -1])
            # Mix in background noise.
            self.background_data_placeholder_ = tf.placeholder(
                tf.float32, [desired_samples, 1], name='background_data')
            self.background_volume_placeholder_ = tf.placeholder(
                tf.float32, [], name='background_volume')
            background_mul = tf.multiply(self.background_data_placeholder_,
                                         self.background_volume_placeholder_)
            background_add = tf.add(background_mul, sliced_foreground)
            background_clamp = tf.clip_by_value(background_add, -1.0, 1.0)

            if flags.preprocess == 'raw':
                # return raw audio
                self.output_ = background_clamp
                tf.summary.image('input_audio',
                                 tf.expand_dims(
                                     tf.expand_dims(background_clamp, -1), -1),
                                 max_outputs=1)
            else:
                # Run the spectrogram and MFCC ops to get a 2D audio 'fingerprint'
                spectrogram = audio_ops.audio_spectrogram(
                    background_clamp,
                    window_size=flags.window_size_samples,
                    stride=flags.window_stride_samples,
                    magnitude_squared=True)
                tf.summary.image('spectrogram',
                                 tf.expand_dims(spectrogram, -1),
                                 max_outputs=1)
                # The number of buckets in each FFT row in the spectrogram will depend
                # on how many input samples there are in each window. This can be quite
                # large, with a 160 sample window producing 127 buckets for example. We
                # don't need this level of detail for classification, so we often want
                # to shrink them down to produce a smaller result. That's what this
                # section implements. One method is to use average pooling to merge
                # adjacent buckets, but a more sophisticated approach is to apply the
                # MFCC algorithm to shrink the representation.
                if flags.preprocess == 'average':
                    self.output_ = tf.nn.pool(
                        input=tf.expand_dims(spectrogram, -1),
                        window_shape=[1, flags.average_window_width],
                        strides=[1, flags.average_window_width],
                        pooling_type='AVG',
                        padding='SAME')
                    tf.summary.image('shrunk_spectrogram',
                                     self.output_,
                                     max_outputs=1)
                elif flags.preprocess == 'mfcc':
                    self.output_ = audio_ops.mfcc(
                        spectrogram,
                        wav_decoder.sample_rate,
                        dct_coefficient_count=flags.fingerprint_width)
                    tf.summary.image('mfcc',
                                     tf.expand_dims(self.output_, -1),
                                     max_outputs=1)
                elif flags.preprocess == 'micro':
                    if not frontend_op:
                        raise Exception(
                            'Micro frontend op is currently not available when running'
                            ' TensorFlow directly from Python, you need to build and run'
                            ' through Bazel')
                    sample_rate = flags.sample_rate
                    window_size_ms = (flags.window_size_samples *
                                      1000) / sample_rate
                    window_step_ms = (flags.window_stride_samples *
                                      1000) / sample_rate
                    int16_input = tf.cast(tf.multiply(background_clamp, 32768),
                                          tf.int16)
                    micro_frontend = frontend_op.audio_microfrontend(
                        int16_input,
                        sample_rate=sample_rate,
                        window_size=window_size_ms,
                        window_step=window_step_ms,
                        num_channels=flags.fingerprint_width,
                        out_scale=1,
                        out_type=tf.float32)
                    self.output_ = tf.multiply(micro_frontend, (10.0 / 256.0))
                    tf.summary.image('micro',
                                     tf.expand_dims(
                                         tf.expand_dims(self.output_, -1), 0),
                                     max_outputs=1)
                else:
                    raise ValueError(
                        'Unknown preprocess mode "%s" (should be "mfcc", '
                        ' "average", or "micro")' % (flags.preprocess))

            # Merge all the summaries and write them out to /tmp/retrain_logs (by
            # default)
            self.merged_summaries_ = tf.summary.merge_all(scope='data')
            if flags.summaries_dir:
                self.summary_writer_ = tf.summary.FileWriter(
                    flags.summaries_dir + '/data', tf.get_default_graph())
Esempio n. 27
0
def arbitrary_style_image_inputs(style_dataset_file,
                                 batch_size=None,
                                 image_size=None,
                                 center_crop=True,
                                 shuffle=True,
                                 augment_style_images=False,
                                 random_style_image_size=False,
                                 min_rand_image_size=128,
                                 max_rand_image_size=300):
    """Loads a batch of random style image given the path of tfrecord dataset.

    This method does not return pre-compute Gram matrices for the images like
    style_image_inputs. But it can provide data augmentation. If
    augment_style_images is equal to True, then style images will randomly
    modified (eg. changes in brightness, hue or saturation) for data
    augmentation. If random_style_image_size is set to True then all images
    in one batch will be resized to a random size.
    Args:
      style_dataset_file: str, path to the tfrecord dataset of style files.
      batch_size: int. If provided, batches style images. Defaults to None.
      image_size: int. The images will be resized bilinearly so that the smallest
          side has size image_size. Defaults to None.
      center_crop: bool. If True, center-crops to [image_size, image_size].
          Defaults to False.
      shuffle: bool, whether to shuffle style files at random. Defaults to False.
      augment_style_images: bool. Wheather to augment style images or not.
      random_style_image_size: bool. If this value is True, then all the style
          images in one batch will be resized to a random size between
          min_rand_image_size and max_rand_image_size.
      min_rand_image_size: int. If random_style_image_size is True, this value
          specifies the minimum image size.
      max_rand_image_size: int. If random_style_image_size is True, this value
          specifies the maximum image size.

    Returns:
      4-D tensor of shape [1, ?, ?, 3] with values in [0, 1] for the style
      image (with random changes for data augmentation if
      augment_style_image_size is set to true), and 0-D tensor for the style
      label, 4-D tensor of shape [1, ?, ?, 3] with values in [0, 1] for the style
      image without random changes for data augmentation.

    Raises:
      ValueError: if center cropping is requested but no image size is provided,
          or if batch size is specified but center-cropping or
          augment-style-images is not requested,
          or if both augment-style-images and center-cropping are requested.
    """
    if center_crop and image_size is None:
        raise ValueError('center-cropping requires specifying the image size.')
    if center_crop and augment_style_images:
        raise ValueError(
            'When augment_style_images is true images will be randomly cropped.'
        )
    if batch_size is not None and not center_crop and not augment_style_images:
        raise ValueError(
            'batching requires same image sizes (Set center-cropping or '
            'augment_style_images to true)')

    with tf.name_scope('style_image_processing'):
        # Force all input processing onto CPU in order to reserve the GPU for the
        # forward inference and back-propagation.
        with tf.device('/cpu:0'):
            filename_queue = tf.train.string_input_producer(
                [style_dataset_file],
                shuffle=False,
                capacity=1,
                name='filename_queue')
            if shuffle:
                examples_queue = tf.RandomShuffleQueue(
                    capacity=64,
                    min_after_dequeue=32,
                    dtypes=[tf.string],
                    name='random_examples_queue')
            else:
                examples_queue = tf.FIFOQueue(capacity=64,
                                              dtypes=[tf.string],
                                              name='fifo_examples_queue')
            reader = tf.TFRecordReader()
            _, value = reader.read(filename_queue)
            enqueue_ops = [examples_queue.enqueue([value])]
            tf.train.queue_runner.add_queue_runner(
                tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops))
            example_serialized = examples_queue.dequeue()
            features = tf.parse_single_example(
                example_serialized,
                features={
                    'label': tf.FixedLenFeature([], tf.int64),
                    'image_raw': tf.FixedLenFeature([], tf.string)
                })
            image = tf.image.decode_jpeg(features['image_raw'])
            image.set_shape([None, None, 3])
            label = features['label']

            if image_size is not None:
                image_channels = image.shape[2].value
                if augment_style_images:
                    image_orig = image
                    image = tf.image.random_brightness(image, max_delta=0.8)
                    image = tf.image.random_saturation(image,
                                                       lower=0.5,
                                                       upper=1.5)
                    image = tf.image.random_hue(image, max_delta=0.2)
                    image = tf.image.random_flip_left_right(image)
                    image = tf.image.random_flip_up_down(image)
                    random_larger_image_size = tf.random_uniform(
                        [],
                        minval=image_size + 2,
                        maxval=image_size + 200,
                        dtype=tf.int32)
                    image = _aspect_preserving_resize(
                        image, random_larger_image_size)
                    image = tf.random_crop(
                        image, size=[image_size, image_size, image_channels])
                    image.set_shape([image_size, image_size, image_channels])

                    image_orig = _aspect_preserving_resize(
                        image_orig, image_size + 2)
                    image_orig = _central_crop([image_orig], image_size,
                                               image_size)[0]
                    image_orig.set_shape([image_size, image_size, 3])
                elif center_crop:
                    image = _aspect_preserving_resize(image, image_size + 2)
                    image = _central_crop([image], image_size, image_size)[0]
                    image.set_shape([image_size, image_size, image_channels])
                    image_orig = image
                else:
                    image = _aspect_preserving_resize(image, image_size)
                    image_orig = image

            image = tf.to_float(image) / 255.0
            image_orig = tf.to_float(image_orig) / 255.0

            if batch_size is None:
                image = tf.expand_dims(image, 0)
            else:
                [image, image_orig,
                 label] = tf.train.batch([image, image_orig, label],
                                         batch_size=batch_size)

            if random_style_image_size:
                # Selects a random size for the style images and resizes all the images
                # in the batch to that size.
                image = _aspect_preserving_resize(
                    image,
                    tf.random_uniform([],
                                      minval=min_rand_image_size,
                                      maxval=max_rand_image_size,
                                      dtype=tf.int32))

            return image, label, image_orig
Esempio n. 28
0
def _calculate_expected_result(dist_per_cell, numeric_values,
                               numeric_values_scale, input_mask_float,
                               logits_aggregation, config):
    """Calculate the expected result given cell and aggregation probabilities."""
    if config.use_gumbel_for_cells:
        gumbel_dist = tfp.distributions.RelaxedBernoulli(
            # The token logits where already divided by the temperature and used for
            # computing cell selection errors so we need to multiply it again here
            config.temperature,
            logits=dist_per_cell.logits_parameter() * config.temperature)
        scaled_probability_per_cell = gumbel_dist.sample()
    else:
        scaled_probability_per_cell = _get_probs(dist_per_cell)

    # <float32>[batch_size, seq_length]
    scaled_probability_per_cell = (scaled_probability_per_cell /
                                   numeric_values_scale) * input_mask_float
    count_result = tf.reduce_sum(scaled_probability_per_cell, axis=1)
    numeric_values_masked = tf.where(
        tf.is_nan(numeric_values), tf.zeros_like(numeric_values),
        numeric_values)  # Mask non-numeric table values to zero.
    sum_result = tf.reduce_sum(scaled_probability_per_cell *
                               numeric_values_masked,
                               axis=1)
    avg_approximation = config.average_approximation_function
    if avg_approximation == AverageApproximationFunction.RATIO:
        average_result = sum_result / (count_result + _EPSILON_ZERO_DIVISION)
    elif avg_approximation == AverageApproximationFunction.FIRST_ORDER:
        # The sum of all probabilities exept that correspond to other cells
        ex = tf.reduce_sum(scaled_probability_per_cell, axis=1, keepdims=True) \
            - scaled_probability_per_cell + 1
        average_result = tf.reduce_sum(numeric_values_masked *
                                       scaled_probability_per_cell / ex,
                                       axis=1)
    elif avg_approximation == AverageApproximationFunction.SECOND_ORDER:
        # The sum of all probabilities exept that correspond to other cells
        ex = tf.reduce_sum(scaled_probability_per_cell, axis=1, keepdims=True) \
            - scaled_probability_per_cell + 1
        pointwise_var = scaled_probability_per_cell * \
            (1 - scaled_probability_per_cell)
        var = tf.reduce_sum(pointwise_var, axis=1,
                            keepdims=True) - pointwise_var
        multiplier = (var / tf.math.square(ex) + 1) / ex
        average_result = tf.reduce_sum(
            numeric_values_masked * scaled_probability_per_cell * multiplier,
            axis=1)
    else:
        tf.logging.error("Invalid average_approximation_function: %s",
                         config.average_approximation_function)

    if config.use_gumbel_for_agg:
        gumbel_dist = tfp.distributions.RelaxedOneHotCategorical(
            config.agg_temperature, logits=logits_aggregation[:, 1:])
        # <float32>[batch_size, num_aggregation_labels - 1]
        aggregation_op_only_probs = gumbel_dist.sample()
    else:
        # <float32>[batch_size, num_aggregation_labels - 1]
        aggregation_op_only_probs = tf.nn.softmax(logits_aggregation[:, 1:] /
                                                  config.agg_temperature,
                                                  axis=-1)
    all_results = tf.concat([
        tf.expand_dims(sum_result, axis=1),
        tf.expand_dims(average_result, axis=1),
        tf.expand_dims(count_result, axis=1)
    ],
                            axis=1)
    expected_result = tf.reduce_sum(all_results * aggregation_op_only_probs,
                                    axis=1)
    return expected_result
def calculate_influence_ood(params):
    """Calculates influence functions for pre-trained model with OOD classes.

  Args:
    params (dict): contains a number of params - as loaded from flags.
    Should contain:
      seed (int) - random seed for Tensorflow and Numpy initialization.
      training_results_dir (str) - parent directory of the pre-trained model.
      clf_name (str) - the name of the pre-trained model's directory.
      n_test_infl (int) - number of examples to run influence functions for.
      start_ix_test_infl (int) - index to start loading examples from.
      cg_maxiter (int) - max number of iterations for conjugate gradient.
      squared (bool) - whether to calculate squared Hessian directly.
      tol (float) - tolerance for conjugate gradient.
      lam (float) - L2 regularization amount for Hessian.
      hvp_samples (int) - number of samples to take in HVP estimation.
      output_dir (str) - where results should be written - defaults to
        training_results_dir/clf_name/influence_results.
      tname (str) - extra string to add to saved tensor names; can be ''.
      preloaded_model (model or None) - if None, we should load the model
        ourselves. Otherwise, preloaded_model is the model we are interested in.
      preloaded_itr (Iterator or None) - if None, load the data iterator
        ourselves; otherwise, use preloaded_itr as the data iterator.
  """

    tf.set_random_seed(params['seed'])
    np.random.seed(params['seed'])

    # Load a trained classifier.
    modeldir = os.path.join(params['training_results_dir'], params['clf_name'])
    param_file = os.path.join(modeldir, 'params.json')
    model_params = utils.load_json(param_file)

    if params['preloaded_model'] is None:
        ckpt_path = os.path.join(modeldir, 'ckpts/bestmodel-1')
        cnn_args = {
            'conv_dims':
            [int(x) for x in model_params['conv_dims'].split(',')],
            'conv_sizes':
            [int(x) for x in model_params['conv_sizes'].split(',')],
            'dense_sizes':
            [int(x) for x in model_params['dense_sizes'].split(',')],
            'n_classes': model_params['n_classes'],
            'onehot': True
        }
        model = utils.load_model(ckpt_path, classifier.CNN, cnn_args)
    else:
        model = params['preloaded_model']

    # Load train/validation/test examples
    tensordir = os.path.join(modeldir, 'tensors')
    validation_x = utils.load_tensor(
        os.path.join(tensordir, 'valid_x_infl.npy'))
    test_x = utils.load_tensor(os.path.join(tensordir, 'test_x_infl.npy'))
    ood_x = utils.load_tensor(os.path.join(tensordir, 'ood_x_infl.npy'))

    # Get in- and out-of-distribution classes.
    n_labels = model_params['n_classes']
    all_classes = range(n_labels)
    ood_classes = ([int(x) for x in model_params['ood_classes'].split(',')]
                   if 'ood_classes' in model_params else [])
    ind_classes = [x for x in all_classes if x not in ood_classes]

    # Load an iterator of training data.
    label_noise = (model_params['label_noise']
                   if 'label_noise' in model_params else 0.)

    # We only look at a portion of the test set for computational reasons.
    ninfl = params['n_test_infl']
    start_ix = params['start_ix_test_infl']
    end_ix = start_ix + ninfl
    xinfl_validation = validation_x[start_ix:end_ix]
    xinfl_test = test_x[start_ix:end_ix]
    xinfl_ood = ood_x[start_ix:end_ix]

    # We want to rotate through all the label options.
    y_all = tf.concat([
        tf.one_hot(tf.fill((ninfl, ), lab), depth=n_labels)
        for lab in ind_classes
    ],
                      axis=0)
    y_all = tf.concat([y_all, y_all, y_all], axis=0)

    xinfl_validation_all = tf.concat([xinfl_validation for _ in ind_classes],
                                     axis=0)
    xinfl_test_all = tf.concat([xinfl_test for _ in ind_classes], axis=0)
    xinfl_ood_all = tf.concat([xinfl_ood for _ in ind_classes], axis=0)
    x_all = tf.concat([xinfl_validation_all, xinfl_test_all, xinfl_ood_all],
                      axis=0)

    cg_approx_params = {
        'maxiter': params['cg_maxiter'],
        'squared': params['squared'],
        'tol': params['tol'],
        'hvp_samples': params['hvp_samples']
    }

    # Here we run conjugate gradient one example at a time, collecting
    # the following outputs.

    # H^{-1}g
    infl_value = []
    # gH^{-1}g
    infl_laplace = []
    # H^{-2}g
    infl_deriv = []
    # g
    grads = []
    # When calculating H^{-1}g with conjugate gradient, Scipy returns a flag
    # denoting the optimization's success.
    warning_flags = []
    # When calculating H^{-2}g with conjugate gradient, Scipy returns a flag
    # denoting the optimization's success.
    warning_flags_deriv = []

    for i in range(x_all.shape[0]):
        logging.info('Example {:d}'.format(i))
        s = time.time()
        xi = tf.expand_dims(x_all[i], 0)
        yi = tf.expand_dims(y_all[i], 0)
        if params['preloaded_itr'] is None:
            itr_train, _, _, _ = dataset_utils.load_dataset_ood_supervised_onehot(
                ind_classes, ood_classes, label_noise=label_noise)
        else:
            itr_train = params['preloaded_itr']
        infl_value_i, grads_i, warning_flag_i = get_parameter_influence(
            model,
            xi,
            yi,
            itr_train,
            approx_params=cg_approx_params,
            damping=params['lam'])
        t = time.time()
        logging.info('IHVP calculation took {:.3f} seconds'.format(t - s))
        infl_laplace_i = tf.multiply(infl_value_i, grads_i)

        infl_value_wtshape = tensor_utils.reshape_vector_as(
            model.weights, infl_value_i)
        loss_function = calculate_influence.make_loss_fn(model, params['lam'])
        gradient_function = calculate_influence.make_grad_fn(model)
        map_gradient_function = calculate_influence.make_map_grad_fn(model)
        s = time.time()
        infl_deriv_i, warning_flag_deriv_i = get_ihvp_conjugate_gradient(
            infl_value_wtshape,
            itr_train,
            loss_function,
            gradient_function,
            map_gradient_function,
            approx_params=cg_approx_params)
        t = time.time()
        logging.info('Second IHVP calculation took {:.3f} seconds'.format(t -
                                                                          s))
        infl_value.append(infl_value_i)
        infl_laplace.append(infl_laplace_i)
        infl_deriv.append(infl_deriv_i)
        grads.append(grads_i)
        warning_flags.append(tf.expand_dims(warning_flag_i, 0))
        warning_flags_deriv.append(tf.expand_dims(warning_flag_deriv_i, 0))

    infl_value = tf.concat(infl_value, axis=0)
    infl_laplace = tf.concat(infl_laplace, axis=0)
    infl_deriv = tf.concat(infl_deriv, axis=0)
    grads = tf.concat(grads, axis=0)
    warning_flags = tf.concat(warning_flags, axis=0)
    warning_flags_deriv = tf.concat(warning_flags_deriv, axis=0)

    res = {}
    for infl_res, nm in [(infl_value, 'infl'), (infl_deriv, 'deriv'),
                         (infl_laplace, 'laplace'), (grads, 'grads'),
                         (warning_flags, 'warnflags'),
                         (warning_flags_deriv, 'warnflags_deriv')]:
        res['valid_{}'.format(nm)] = infl_res[:ninfl * len(ind_classes)]
        res['test_{}'.format(nm)] = infl_res[ninfl * len(ind_classes):2 *
                                             ninfl * len(ind_classes)]
        res['ood_{}'.format(nm)] = infl_res[2 * ninfl * len(ind_classes):]

    # Save the results of these calculations.
    if params['output_dir']:
        resdir = utils.make_subdir(params['output_dir'], 'influence_results')
    else:
        resdir = utils.make_subdir(modeldir, 'influence_results')
    tensor_name_template = '{}{}-inv_hvp-cg-ix{:d}-ninfl{:d}' + (
        '_squared' if params['squared'] else '')
    infl_tensors = [(tensor_name_template.format(params['tname'], label,
                                                 start_ix, ninfl), res[label])
                    for label in res.keys()]
    utils.save_tensors(infl_tensors, resdir)
Esempio n. 30
0
def add_metric_fn_inputs(params,
                         cls_outputs,
                         box_outputs,
                         metric_fn_inputs,
                         max_detection_points=anchors.MAX_DETECTION_POINTS):
  """Selects top-k predictions and adds the selected to metric_fn_inputs.

  Args:
    params: a parameter dictionary that includes `min_level`, `max_level`,
      `batch_size`, and `num_classes`.
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in
      [batch_size, height, width, num_anchors * 4].
    metric_fn_inputs: a dictionary that will hold the top-k selections.
    max_detection_points: an integer specifing the maximum detection points to
      keep before NMS. Keep all anchors if max_detection_points <= 0.
  """
  batch_size = params['batch_size']
  num_classes = params['num_classes']
  cls_outputs_all = []
  box_outputs_all = []
  # Concatenates class and box of all levels into one tensor.
  for level in range(params['min_level'], params['max_level'] + 1):
    if params['data_format'] == 'channels_first':
      cls_outputs[level] = tf.transpose(cls_outputs[level], [0, 2, 3, 1])
      box_outputs[level] = tf.transpose(box_outputs[level], [0, 2, 3, 1])

    cls_outputs_all.append(tf.reshape(
        cls_outputs[level], [batch_size, -1, num_classes]))
    box_outputs_all.append(tf.reshape(box_outputs[level], [batch_size, -1, 4]))
  cls_outputs_all = tf.concat(cls_outputs_all, 1)
  box_outputs_all = tf.concat(box_outputs_all, 1)

  if max_detection_points > 0:
    # Prune anchors and detections to only keep max_detection_points.
    # Due to some issues, top_k is currently slow in graph model.
    cls_outputs_all_reshape = tf.reshape(cls_outputs_all, [batch_size, -1])
    _, cls_topk_indices = tf.math.top_k(cls_outputs_all_reshape,
                                        k=max_detection_points,
                                        sorted=False)
    indices = cls_topk_indices // num_classes
    classes = cls_topk_indices % num_classes
    cls_indices = tf.stack([indices, classes], axis=2)
    cls_outputs_all_after_topk = tf.gather_nd(
        cls_outputs_all, cls_indices, batch_dims=1)
    box_outputs_all_after_topk = tf.gather_nd(
        box_outputs_all, tf.expand_dims(indices, 2), batch_dims=1)
  else:
    # Keep all anchors, but for each anchor, just keep the max probablity for
    # each class.
    cls_outputs_idx = tf.math.argmax(cls_outputs_all, axis=-1)
    num_anchors = cls_outputs_all.shape[1]

    classes = cls_outputs_idx
    indices = tf.tile(tf.expand_dims(tf.range(num_anchors), axis=0),
                      [batch_size, 1])
    cls_outputs_all_after_topk = tf.reduce_max(cls_outputs_all, -1)
    box_outputs_all_after_topk = box_outputs_all

  metric_fn_inputs['cls_outputs_all'] = cls_outputs_all_after_topk
  metric_fn_inputs['box_outputs_all'] = box_outputs_all_after_topk
  metric_fn_inputs['indices_all'] = indices
  metric_fn_inputs['classes_all'] = classes