Example #1
0
def create_optimizer(loss,
                     init_lr,
                     num_train_steps,
                     num_warmup_steps,
                     use_tpu,
                     pretrained_param_names,
                     freeze_pretrained_steps,
                     restart_warmup_after_unfreeze=True,
                     lr_after_restarting=0.):
    """Creates an optimizer training op."""
    global_step = tf.train.get_or_create_global_step()
    global_steps_int = tf.cast(global_step, tf.int32)

    num_train_steps_int = tf.constant(num_train_steps, dtype=tf.int32)
    warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)
    current_step_in_decay = global_steps_int - warmup_steps_int
    num_decay_steps = num_train_steps_int - warmup_steps_int

    global_steps_float = tf.cast(global_steps_int, tf.float32)

    if freeze_pretrained_steps and restart_warmup_after_unfreeze:
        freeze_pretrained_steps_int = tf.cast(freeze_pretrained_steps,
                                              tf.int32)
        global_steps_int -= (tf.cast(
            global_steps_int >= freeze_pretrained_steps_int, tf.int32) *
                             freeze_pretrained_steps_int)
        if lr_after_restarting <= 0.:
            raise ValueError(
                "Learning rate after restarting should not be zero: " +
                str(lr_after_restarting))
        learning_rate = tf.cond(global_step < freeze_pretrained_steps,
                                lambda: init_lr, lambda: lr_after_restarting)

        current_step_in_decay = tf.cond(
            global_step < freeze_pretrained_steps,
            lambda: current_step_in_decay,
            lambda: global_steps_int - warmup_steps_int)

        after_unfreeze_decay_steps = num_train_steps_int - (
            freeze_pretrained_steps + warmup_steps_int)

        num_decay_steps = tf.cond(global_step < freeze_pretrained_steps,
                                  lambda: num_decay_steps,
                                  lambda: after_unfreeze_decay_steps)

        after_unfreeze_steps = global_steps_float - tf.cast(
            freeze_pretrained_steps_int, tf.float32)
        global_steps_float = tf.cond(global_step < freeze_pretrained_steps,
                                     lambda: global_steps_float,
                                     lambda: after_unfreeze_steps)

        tf.summary.scalar(
            "is pretraining",
            tf.cast(global_step < freeze_pretrained_steps, tf.int32))
    else:
        learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)

    tf.summary.scalar("global step count", global_steps_float)
    tf.summary.scalar("current base learning rate", learning_rate)
    tf.summary.scalar("global decay step", current_step_in_decay)
    tf.summary.scalar("total decay steps", num_decay_steps)

    # Implements linear decay of the learning rate.
    learning_rate = tf.train.polynomial_decay(learning_rate,
                                              tf.cast(current_step_in_decay,
                                                      tf.float32),
                                              tf.cast(num_decay_steps,
                                                      tf.float32),
                                              end_learning_rate=0.0,
                                              power=1.0,
                                              cycle=False)

    tf.summary.scalar("decayed learning rate", learning_rate)

    # Implements linear warmup. I.e., if global_step < num_warmup_steps, the
    # learning rate will be `global_step/num_warmup_steps * init_lr`.
    if num_warmup_steps:
        warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)

        warmup_percent_done = global_steps_float / warmup_steps_float

        tf.summary.scalar("warmup percent done", warmup_percent_done)

        warmup_learning_rate = learning_rate * warmup_percent_done

        is_warmup = global_steps_int < warmup_steps_int

        tf.summary.scalar("is warmup", tf.cast(is_warmup, tf.float32))
        learning_rate = tf.cond(is_warmup, lambda: warmup_learning_rate,
                                lambda: learning_rate)

    tf.summary.scalar("learning rate", learning_rate)

    # It is recommended that you use this optimizer for fine tuning, since this
    # is how the model was trained (note that the Adam m/v variables are NOT
    # loaded from init_checkpoint.)
    optimizer = AdamWeightDecayOptimizer(
        learning_rate=learning_rate,
        weight_decay_rate=0.01,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-6,
        exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"],
        pretrained_param_names=pretrained_param_names,
        freeze_pretrained_steps=freeze_pretrained_steps)

    if use_tpu:
        optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

    tvars = tf.trainable_variables()
    grads = tf.gradients(loss, tvars)

    # This is how the model was pre-trained.
    (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)

    train_op = optimizer.apply_gradients(list(zip(grads, tvars)),
                                         global_step=global_step)

    # Normally the global step update is done inside of `apply_gradients`.
    # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use
    # a different optimizer, you should probably take this line out.
    new_global_step = global_step + 1
    train_op = tf.group(train_op, [global_step.assign(new_global_step)])
    return train_op
Example #2
0
def train(train_dir,
          config,
          dataset_fn,
          checkpoints_to_keep=5,
          keep_checkpoint_every_n_hours=1,
          num_steps=None,
          master='',
          num_sync_workers=0,
          num_ps_tasks=0,
          task=0):
    """Train loop."""
    tf.gfile.MakeDirs(train_dir)
    is_chief = (task == 0)
    if is_chief:
        _trial_summary(config.hparams, config.train_examples_path
                       or config.tfds_name, train_dir)
    with tf.Graph().as_default():
        with tf.device(
                tf.train.replica_device_setter(num_ps_tasks,
                                               merge_devices=True)):

            model = config.model
            model.build(config.hparams,
                        config.data_converter.output_depth,
                        is_training=True)

            optimizer = model.train(**_get_input_tensors(dataset_fn(), config))

            hooks = []
            if num_sync_workers:
                optimizer = tf.train.SyncReplicasOptimizer(
                    optimizer, num_sync_workers)
                hooks.append(optimizer.make_session_run_hook(is_chief))

            grads, var_list = list(
                zip(*optimizer.compute_gradients(model.loss)))
            global_norm = tf.global_norm(grads)
            tf.summary.scalar('global_norm', global_norm)

            if config.hparams.clip_mode == 'value':
                g = config.hparams.grad_clip
                clipped_grads = [
                    tf.clip_by_value(grad, -g, g) for grad in grads
                ]
            elif config.hparams.clip_mode == 'global_norm':
                clipped_grads = tf.cond(
                    global_norm < config.hparams.grad_norm_clip_to_zero,
                    lambda: tf.clip_by_global_norm(  # pylint:disable=g-long-lambda
                        grads,
                        config.hparams.grad_clip,
                        use_norm=global_norm)[0],
                    lambda: [tf.zeros(tf.shape(g)) for g in grads])
            else:
                raise ValueError('Unknown clip_mode: {}'.format(
                    config.hparams.clip_mode))
            train_op = optimizer.apply_gradients(list(
                zip(clipped_grads, var_list)),
                                                 global_step=model.global_step,
                                                 name='train_step')

            logging_dict = {
                'global_step': model.global_step,
                'loss': model.loss
            }

            hooks.append(
                tf.train.LoggingTensorHook(logging_dict, every_n_iter=100))
            if num_steps:
                hooks.append(tf.train.StopAtStepHook(last_step=num_steps))

            scaffold = tf.train.Scaffold(saver=tf.train.Saver(
                max_to_keep=checkpoints_to_keep,
                keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours))
            tf_slim.training.train(train_op=train_op,
                                   logdir=train_dir,
                                   scaffold=scaffold,
                                   hooks=hooks,
                                   save_checkpoint_secs=60,
                                   master=master,
                                   is_chief=is_chief)
Example #3
0
def build_learning_rate_schedule(
    learning_rate,
    decay_type,
    warmup_start_epoch,
    max_learning_rate_epoch,
    decay_end_epoch,
    global_step,
    steps_per_epoch,
    **decay_type_specific_kwargs):
  """Build learning rate from base learning rate and other details.

  We note that warmup_start_epoch <= max_learning_rate_epoch < decay_end_epoch
  since the warmup happens at the start of learning rate schedule.

  Args:
    learning_rate: Learning rate for the model.
    decay_type: Name of the decay that should be applied to the learning rate.
    warmup_start_epoch: Epoch at which learning rate warmup starts.
    max_learning_rate_epoch: Epoch at which learning rate warmup ends and the
      decay kicks in.
    decay_end_epoch: Epoch at which learning rate decays ends, at which point
      learning rate becomes 0.
    global_step: The global step to use for learning rate computation.
    steps_per_epoch: Integer which defines the number of steps that are run for
      every epoch.
    **decay_type_specific_kwargs: Specific key-word arguments which are unique
      to a said `decay_type`.


  Returns:
    Scalar tensor which stores the learning rate at a given global step.
  """
  if decay_end_epoch == max_learning_rate_epoch:
    # This stage of training is 0 epochs long, so just return learning_rate and
    # avoid potential divide by 0 problems.
    if warmup_start_epoch < max_learning_rate_epoch:
      raise ValueError(
          'Cannot have warmup for a 0-step learning rate schedule.')

    return learning_rate

  assert warmup_start_epoch <= max_learning_rate_epoch
  assert max_learning_rate_epoch < decay_end_epoch

  max_learning_rate_epoch_tensor = tf.convert_to_tensor(max_learning_rate_epoch)
  warmup_start_epoch_tensor = tf.convert_to_tensor(
      warmup_start_epoch, max_learning_rate_epoch_tensor.dtype)
  decay_end_epoch_tensor = tf.convert_to_tensor(
      decay_end_epoch, max_learning_rate_epoch_tensor.dtype)
  steps_per_epoch_tensor = tf.cast(steps_per_epoch,
                                   max_learning_rate_epoch_tensor.dtype)

  # Learning rate decay kicks in starting max_learning_rate_epoch
  # Before max_learning_rate_epoch either there is a warmup or the learning rate
  # is set to the constant value of `initial_lr`.
  learning_rate_step = global_step - tf.cast(
      max_learning_rate_epoch_tensor * steps_per_epoch_tensor,
      global_step.dtype)

  def _no_decay_fn(initial_lr, *args, **kwargs):
    del args, kwargs
    return initial_lr

  decay_type_fn_map = {
      enums.DecayType.EXPONENTIAL: exponential_decay,
      enums.DecayType.COSINE: cosine_decay,
      enums.DecayType.PIECEWISE_LINEAR: piecewise_linear_decay,
      enums.DecayType.NO_DECAY: _no_decay_fn,
  }
  if decay_type not in decay_type_fn_map:
    raise ValueError(f'Unknown decay type {decay_type}')

  decayed_learning_rate = decay_type_fn_map[decay_type](
      initial_lr=learning_rate,
      global_step=learning_rate_step,
      total_epochs=decay_end_epoch_tensor - max_learning_rate_epoch_tensor,
      steps_per_epoch=steps_per_epoch,
      **decay_type_specific_kwargs)

  # The learning rate is set to 0 once global_step is more than total_steps.
  total_steps = tf.cast(
      steps_per_epoch_tensor * (
          decay_end_epoch_tensor - max_learning_rate_epoch_tensor),
      global_step.dtype)
  decayed_learning_rate = tf.cond(
      learning_rate_step <= total_steps,
      lambda: decayed_learning_rate,
      lambda: 0.0)

  warmup_step_counter = global_step - tf.cast(
      warmup_start_epoch_tensor * steps_per_epoch_tensor, global_step.dtype)
  return maybe_add_warmup_to_lr(
      learning_rate, decayed_learning_rate, warmup_step_counter,
      max_learning_rate_epoch - warmup_start_epoch_tensor,
      steps_per_epoch_tensor)
Example #4
0
def _buckets(data, bucket_count=None):
    """Create a TensorFlow op to group data into histogram buckets.

    Arguments:
      data: A `Tensor` of any shape. Must be castable to `float64`.
      bucket_count: Optional positive `int` or scalar `int32` `Tensor`.
    Returns:
      A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is
      a triple `[left_edge, right_edge, count]` for a single bucket.
      The value of `k` is either `bucket_count` or `1` or `0`.
    """
    # TODO(nickfelt): remove on-demand imports once dep situation is fixed.
    import tensorflow.compat.v1 as tf

    if bucket_count is None:
        bucket_count = summary_v2.DEFAULT_BUCKET_COUNT
    with tf.name_scope("buckets",
                       values=[data, bucket_count]), tf.control_dependencies([
                           tf.assert_scalar(bucket_count),
                           tf.assert_type(bucket_count, tf.int32)
                       ]):
        data = tf.reshape(data, shape=[-1])  # flatten
        data = tf.cast(data, tf.float64)
        is_empty = tf.equal(tf.size(input=data), 0)

        def when_empty():
            return tf.constant([], shape=(0, 3), dtype=tf.float64)

        def when_nonempty():
            min_ = tf.reduce_min(input_tensor=data)
            max_ = tf.reduce_max(input_tensor=data)
            range_ = max_ - min_
            is_singular = tf.equal(range_, 0)

            def when_nonsingular():
                bucket_width = range_ / tf.cast(bucket_count, tf.float64)
                offsets = data - min_
                bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                         dtype=tf.int32)
                clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
                # Use float64 instead of float32 to avoid accumulating floating point error
                # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values.
                # See https://github.com/tensorflow/tensorflow/issues/51419 for details.
                one_hots = tf.one_hot(clamped_indices,
                                      depth=bucket_count,
                                      dtype=tf.float64)
                bucket_counts = tf.cast(
                    tf.reduce_sum(input_tensor=one_hots, axis=0),
                    dtype=tf.float64,
                )
                edges = tf.linspace(min_, max_, bucket_count + 1)
                left_edges = edges[:-1]
                right_edges = edges[1:]
                return tf.transpose(
                    a=tf.stack([left_edges, right_edges, bucket_counts]))

            def when_singular():
                center = min_
                bucket_starts = tf.stack([center - 0.5])
                bucket_ends = tf.stack([center + 0.5])
                bucket_counts = tf.stack(
                    [tf.cast(tf.size(input=data), tf.float64)])
                return tf.transpose(
                    a=tf.stack([bucket_starts, bucket_ends, bucket_counts]))

            return tf.cond(is_singular, when_singular, when_nonsingular)

        return tf.cond(is_empty, when_empty, when_nonempty)
Example #5
0
    def build_sample_graph(self,
                           input_pianorolls=None,
                           outer_masks=None,
                           total_gibbs_steps=None):
        """Builds the tf.while_loop based sampling graph.

    Args:
      input_pianorolls: Optional input pianorolls override. If None, uses the
          pianorolls placeholder.
      outer_masks: Optional input outer_masks override. If None, uses the
          outer_masks placeholder.
      total_gibbs_steps: Optional input total_gibbs_steps override. If None,
          uses the total_gibbs_steps placeholder.
    Returns:
      The output op of the graph.
    """
        if input_pianorolls is None:
            input_pianorolls = self.inputs["pianorolls"]
        if outer_masks is None:
            outer_masks = self.inputs["outer_masks"]

        tt = tf.shape(input_pianorolls)[1]
        sample_steps = tf.to_float(self.inputs["sample_steps"])
        if total_gibbs_steps is None:
            total_gibbs_steps = self.inputs["total_gibbs_steps"]
        temperature = self.inputs["temperature"]

        input_pianorolls = tf.to_float(input_pianorolls)
        outer_masks = self.make_outer_masks(outer_masks, input_pianorolls)

        # Calculate total_gibbs_steps as steps * num_instruments if not given.
        total_gibbs_steps = tf.cond(
            tf.equal(total_gibbs_steps, 0),
            lambda: tf.to_float(tt * self.hparams.num_instruments),
            lambda: tf.to_float(total_gibbs_steps))

        # sample_steps is set to total_gibbs_steps if not given.
        sample_steps = tf.cond(tf.equal(sample_steps,
                                        0), lambda: total_gibbs_steps,
                               lambda: tf.to_float(sample_steps))

        def infer_step(pianorolls, step_count):
            """Called by tf.while_loop, takes a Gibbs step."""
            mask_prob = compute_mask_prob_from_yao_schedule(
                step_count, total_gibbs_steps)
            # 1 indicates mask out, 0 is not mask.
            masks = make_bernoulli_masks(tf.shape(pianorolls), mask_prob,
                                         outer_masks)

            logits = self.predict(pianorolls, masks)
            samples = sample_with_temperature(logits, temperature=temperature)

            outputs = pianorolls * (1 - masks) + samples * masks

            check_completion_op = tf.assert_equal(
                tf.where(tf.equal(tf.reduce_max(masks, axis=2), 1.),
                         tf.reduce_max(outputs, axis=2),
                         tf.reduce_max(pianorolls, axis=2)), 1.)
            with tf.control_dependencies([check_completion_op]):
                outputs = tf.identity(outputs)

            step_count += 1
            return outputs, step_count

        current_step = tf.to_float(self.inputs["current_step"])

        # Initializes pianorolls by evaluating the model once to fill in all gaps.
        logits = self.predict(tf.to_float(input_pianorolls), outer_masks)
        samples = sample_with_temperature(logits, temperature=temperature)
        tf.get_variable_scope().reuse_variables()

        self.samples, current_step = tf.while_loop(
            lambda samples, current_step: current_step < sample_steps,
            infer_step, [samples, current_step],
            shape_invariants=[
                tf.TensorShape([None, None, None, None]),
                tf.TensorShape(None),
            ],
            back_prop=False,
            parallel_iterations=1,
            name="coco_while")
        self.samples.set_shape(input_pianorolls.shape)
        return self.samples
def parse_fn(filename, output_sequence_length=IMAGES_PER_SEQUENCE):
    """Read data from single files stored in directories.

    Args:
      filename: the filename of the set of files to be loaded.
      output_sequence_length: Length of the output sequence. If less than
        IMAGES_PER_SEQUENCE, only the first `output_sequence_length` frames will
        be kept.

    Returns:
      A dictionary that maps strings to tf.Tensors of type float32:

      'rgb': an RGB image of shape H, W, 3. Each channel value is between 0.0 and
             1.0.
      'intrinsics': a list of intrinsics values.
    """
    if output_sequence_length > IMAGES_PER_SEQUENCE or output_sequence_length < 1:
        raise ValueError(
            'Invalid output_sequence_length %d: must be within [1, '
            '%d].' % (output_sequence_length, IMAGES_PER_SEQUENCE))
    image_file = tf.strings.join([filename, '.png'])
    intrinsics_file = tf.strings.join([filename, '_cam.txt'])
    mask_file = tf.strings.join([filename, '-fseg.png'])

    # Read files.
    encoded_image = tf.io.read_file(image_file)
    encoded_mask = tf.io.read_file(mask_file)
    intrinsics_content = tf.io.read_file(intrinsics_file)
    content_is_empty = tf.math.equal(intrinsics_content, '')
    filename_matches = tf.strings.regex_full_match(
        filename, '.*%s$' % KITTI_CORRUPT_FILE)
    file_is_corrupt = tf.math.logical_and(content_is_empty, filename_matches)

    intrinsics_content = tf.cond(file_is_corrupt,
                                 lambda: KITTI_CORRUPT_FILE_INTRINSICS,
                                 lambda: intrinsics_content)

    # Parse intrinsics data to a tensor representing a 3x3 matrix.
    intrinsics = tf.strings.split([intrinsics_content], ',').values
    intrinsics = tf.strings.to_number(intrinsics)
    intrinsics.set_shape([9])

    fx, _, x0, _, fy, y0, _, _, _ = tf.unstack(intrinsics)
    intrinsics = tf.stack([IMAGE_WIDTH, IMAGE_HEIGHT, fx, fy, x0, y0])

    # Decode and normalize images.
    decoded_image = tf.image.decode_png(encoded_image, channels=3)
    decoded_image = tf.to_float(decoded_image) * (1 / 255.0)
    split_image_sequence = tf.split(decoded_image, IMAGES_PER_SEQUENCE, axis=1)

    decoded_mask = tf.image.decode_png(encoded_mask, channels=3)
    mask_r, mask_g, mask_b = tf.unstack(tf.to_int32(decoded_mask), axis=-1)
    # Since TPU does not support images of type uint8, we encode the 3 RGB uint8
    # values into one int32 value.
    mask = mask_r * (256 * 256) + mask_g * 256 + mask_b
    # All images in our pipeline have 3 dimensions (height, width, channels), so
    # we add a third dimension to the mask too.
    mask = tf.expand_dims(mask, -1)
    split_mask_sequence = tf.split(mask, IMAGES_PER_SEQUENCE, axis=1)

    return {
        'rgb': tf.stack(split_image_sequence[:output_sequence_length]),
        'intrinsics': tf.stack([intrinsics] * output_sequence_length),
        'mask': tf.stack(split_mask_sequence[:output_sequence_length]),
    }
Example #7
0
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        if self._include_mask:
            masks = data['groundtruth_instance_masks']

        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            if self._include_mask:
                masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            if self._include_mask:
                image, boxes, masks = input_utils.random_horizontal_flip(
                    image, boxes, masks)
            else:
                image, boxes = input_utils.random_horizontal_flip(image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=input_utils.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  (image_height, image_width),
                                                  offset)
        if self._include_mask:
            masks = input_utils.resize_and_crop_masks(
                tf.expand_dims(masks, axis=-1), image_scale,
                (image_height, image_width), offset)
            masks = tf.squeeze(masks, axis=-1)

        # Filters out ground truth boxes that are all zeros.
        indices = input_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        if self._include_mask:
            masks = tf.gather(masks, indices)
            num_masks = tf.shape(masks)[0]
            masks = tf.image.crop_and_resize(
                tf.expand_dims(masks, axis=-1),
                box_utils.normalize_boxes(boxes,
                                          tf.shape(image)[0:2]),
                box_ind=tf.range(num_masks, dtype=tf.int32),
                crop_size=[self._mask_crop_size, self._mask_crop_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size,
                                     (image_height, image_width))
        anchor_labeler = anchor.RpnAnchorLabeler(input_anchor,
                                                 self._rpn_match_threshold,
                                                 self._rpn_unmatched_threshold,
                                                 self._rpn_batch_size_per_im,
                                                 self._rpn_fg_fraction)
        rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
            boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'anchor_boxes': input_anchor.multilevel_boxes,
            'image_info': image_info,
            'rpn_score_targets': rpn_score_targets,
            'rpn_box_targets': rpn_box_targets,
        }
        labels['gt_boxes'] = input_utils.pad_to_fixed_size(
            boxes, self._max_num_instances, -1)
        labels['gt_classes'] = input_utils.pad_to_fixed_size(
            classes, self._max_num_instances, -1)
        if self._include_mask:
            labels['gt_masks'] = input_utils.pad_to_fixed_size(
                masks, self._max_num_instances, -1)

        return image, labels
Example #8
0
 def proposal(*args):
     return tf.cond(
         pred=no_crop_check(),
         true_fn=no_crop_proposal,
         false_fn=crop_proposal,
     )
Example #9
0
def get_train_ops(loss,
                  tf_variables,
                  train_step,
                  clip_mode=None,
                  grad_bound=None,
                  l2_reg=1e-4,
                  lr_warmup_val=None,
                  lr_warmup_steps=100,
                  lr_init=0.1,
                  lr_dec_start=0,
                  lr_dec_every=10000,
                  lr_dec_rate=0.1,
                  lr_dec_min=None,
                  lr_cosine=False,
                  lr_max=None,
                  lr_min=None,
                  lr_T_0=None,
                  lr_T_mul=None,
                  num_train_batches=None,
                  optim_algo=None,
                  sync_replicas=False,
                  num_aggregate=None,
                  num_replicas=None,
                  get_grad_norms=False,
                  moving_average=None):
    """
	Args:
	  clip_mode: "global", "norm", or None.
	  moving_average: store the moving average of parameters
	"""

    if l2_reg > 0:
        l2_losses = []
        for var in tf_variables:
            l2_losses.append(tf.reduce_sum(var**2))
        l2_loss = tf.add_n(l2_losses)
        loss += l2_reg * l2_loss  # loss = loss + 1e-4*l2_loss

    grads = tf.gradients(loss, tf_variables)
    grad_norm = tf.global_norm(grads)

    grad_norms = {}
    for v, g in zip(tf_variables, grads):
        if v is None or g is None:
            continue
        if isinstance(g, tf.IndexedSlices):
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2))
        else:
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2))

    if clip_mode is not None:
        assert grad_bound is not None, "Need grad_bound to clip gradients."
        if clip_mode == "global":
            grads, _ = tf.clip_by_global_norm(grads, grad_bound)
        elif clip_mode == "norm":
            clipped = []
            for g in grads:
                if isinstance(g, tf.IndexedSlices):
                    c_g = tf.clip_by_norm(g.values, grad_bound)
                    c_g = tf.IndexedSlices(g.indices, c_g)
                else:
                    c_g = tf.clip_by_norm(g, grad_bound)
                clipped.append(g)
            grads = clipped
        else:
            raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))

    if lr_cosine:
        assert lr_max is not None, "Need lr_max to use lr_cosine"
        assert lr_min is not None, "Need lr_min to use lr_cosine"
        assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
        assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
        assert num_train_batches is not None, ("Need num_train_batches to use"
                                               " lr_cosine")

        curr_epoch = train_step // num_train_batches  # train step will be calculated by just one batch!

        last_reset = tf.Variable(0,
                                 dtype=tf.int32,
                                 trainable=False,
                                 name="last_reset")
        T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i")
        T_curr = curr_epoch - last_reset

        def _update():
            update_last_reset = tf.assign(last_reset,
                                          curr_epoch,
                                          use_locking=True)
            update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True)
            with tf.control_dependencies([update_last_reset, update_T_i]):
                rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
                lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        def _no_update():
            rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
            lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update,
                                _no_update)
    else:
        learning_rate = tf.train.exponential_decay(
            lr_init,
            tf.maximum(train_step - lr_dec_start, 0),
            lr_dec_every,
            lr_dec_rate,
            staircase=True)
        if lr_dec_min is not None:
            learning_rate = tf.maximum(learning_rate, lr_dec_min)

    if lr_warmup_val is not None:
        learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
                                lambda: lr_warmup_val, lambda: learning_rate)

    if optim_algo == "momentum":
        opt = tf.train.MomentumOptimizer(learning_rate,
                                         0.9,
                                         use_locking=True,
                                         use_nesterov=True)
    elif optim_algo == "sgd":
        opt = tf.train.GradientDescentOptimizer(learning_rate,
                                                use_locking=True)
    elif optim_algo == "adam":
        opt = tf.train.AdamOptimizer(learning_rate,
                                     beta1=0.0,
                                     epsilon=1e-3,
                                     use_locking=True)
    else:
        raise ValueError("Unknown optim_algo {}".format(optim_algo))

    if sync_replicas:
        assert num_aggregate is not None, "Need num_aggregate to sync."
        assert num_replicas is not None, "Need num_replicas to sync."

        opt = tf.train.SyncReplicasOptimizer(
            opt,
            replicas_to_aggregate=num_aggregate,
            total_num_replicas=num_replicas,
            use_locking=True)

    if moving_average is not None:
        opt = tf.contrib.opt.MovingAverageOptimizer(
            opt, average_decay=moving_average)

    train_op = opt.apply_gradients(zip(grads, tf_variables),
                                   global_step=train_step)

    if get_grad_norms:
        return train_op, learning_rate, grad_norm, opt, grad_norms
    else:
        return train_op, learning_rate, grad_norm, opt
Example #10
0
 def body(x):
     x = tf.constant(7)
     z = tf.constant(20)
     res = tf.cond(tf.less(x, 10), lambda: tf.add(
         10, 20), lambda: tf.square(10))
     return tf.multiply(res, x)
Example #11
0
def D(data_source,
      x_real,
      x_fake,
      dropout_rate,
      is_training,
      reuse=True,
      print_summary=True):
    # data_source is a string, either "fake" or "real", which determines whether do to the word
    # embedding lookup to avoid non-differentiability issues.
    # discriminator (x -> n + 1 class)

    with tf.variable_scope('Discriminator', reuse=reuse) as scope:
        # Embedding layer
        # Input x has shape [batch_size, 63] where 63 is the sequence length
        W_embed = tf.Variable(tf.random_uniform([vocab_size, embedding_size],
                                                -1.0, 1.0),
                              name="W_embed")
        embedded_chars = tf.nn.embedding_lookup(W_embed, x_real)
        # Add a channel dimension:
        embedded_char_expanded = tf.expand_dims(embedded_chars, -1)
        # Output size: [batch_size, sequence_length, embedding_size, 1]

        print('fake shape is!')
        print(x_fake.get_shape())
        print('embed_char_expand shape is!')
        print(embedded_char_expanded.get_shape())

        # conditional pipeline!
        def f1():
            return embedded_char_expanded

        def f2():
            return x_fake

        real_or_fake = tf.math.equal('real', data_source)
        input_x = tf.cond(real_or_fake, f1, f2)

        print('input_x shape is!')  # [batch, seq_len, embed_size, 1]
        print(input_x.get_shape())

        pooled_outputs = [
        ]  # As per the paper, the pooling layer takes the max of each filter's featuremaps
        # NOTE: We are using multiple filter sizes as per the paper's specs
        for i, filter_size in enumerate(filter_sizes):
            #with tf.name_scope("conv-maxpool-filter_size-"+str(filter_size)):
            # Define W as the filter matrix (NOTE: different namescope from the W above)
            # Initialized with truncated normal parameters
            # The W filter has shape: [height, width, input_channels, output_channels]
            W = tf.Variable(
                tf.truncated_normal(
                    [filter_size, embedding_size, 1, num_filters], stddev=0.1))
            # Conv layer: valid padding yields output of shape:
            # [none, sequence_length - filter_size + 1, 1, num_filters]
            # for dimensions: [none, height, width, channel]
            # TF document: "(conv2d) has the same type as input and the same outer batch shape."
            conv = tf.nn.conv2d(input_x,
                                W,
                                strides=[1, 1, 1, 1],
                                padding="VALID",
                                name="conv")
            # Biase vector: 1d vector with length=number of output channels of conv
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            # Relu
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
            lrelu3 = tf.maximum(0.2 * h, h)
            # TF document: "ksize: The size of the window for each dimension of the input tensor."
            pooled = tf.nn.max_pool(
                lrelu3,
                ksize=[1, sequence_length - filter_size + 1, 1, 1],
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="pool")
            # The output now has size: [none, 1, 1, num_filters]
            pooled_outputs.append(pooled)
        num_filters_total = num_filters * len(filter_sizes)
        h_pool = tf.concat(pooled_outputs,
                           3)  # Concatenate on the forth dimension
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

        # The output now has shape: [none, num_filters_total]
        #with tf.name_scope("dropout"):
        h_drop = tf.nn.dropout(h_pool_flat, rate=dropout_rate)
        #with tf.name_scope("output"):
        # Fully connected layer
        # Matrix multiplication: (none, num_filters_total)x(num_filters_total, num_classes) = (none, num_classes)
        W = tf.Variable(tf.truncated_normal(
            [num_filters_total, num_classes + 1], stddev=0.1),
                        name="W")
        # NOTE: b has dimension of the channels (in this case, num_classes)
        b = tf.Variable(tf.constant(0.1, shape=[num_classes + 1]), name="b")
        fc = tf.nn.xw_plus_b(h_drop, W, b, name="scores")  #Logits
        output = tf.nn.softmax(fc)
        return h_pool_flat, fc, output, real_or_fake
Example #12
0
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        image_info: image information that includes the original height and
            width, the scale of the proccessed image to the original image, and
            the scaled height and width.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                data['groundtruth_is_crowd'] = tf.cond(
                    tf.greater(tf.size(data['groundtruth_is_crowd']),
                               0), lambda: data['groundtruth_is_crowd'],
                    lambda: tf.zeros_like(data['groundtruth_classes'],
                                          dtype=tf.bool))
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                input_height = tf.shape(image)[0]
                input_width = tf.shape(image)[1]

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                scaled_height = tf.to_float(
                    input_height) * input_processor.image_scale
                scaled_width = tf.to_float(
                    input_width) * input_processor.image_scale
                image_info = tf.stack([
                    tf.cast(scaled_height, dtype=tf.float32),
                    tf.cast(scaled_width, dtype=tf.float32),
                    image_scale,
                    tf.cast(input_height, dtype=tf.float32),
                    tf.cast(input_width, dtype=tf.float32),
                ])
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, image_info, boxes, is_crowds,
                        areas, classes)
Example #13
0
    def compute_total_loss(self, pd_new, pd_old, value_tensor, return_tensor,
                           batch_advantage_norm, policy_old_neg_logprob_tensor,
                           policy_action_tensor):
        """Defines the total loss function.

    Args:
      pd_new: The current policy distribution
        (a multivariate normal distribution). This policy distribution gets
        updated in the course of training.
      pd_old: The old policy distribution that we use during sampling the
        trajectory (a multivariate normal distribution).
      value_tensor: The values associated to the rollout trajectory.
      return_tensor: The return values computed for the rollout trajectory.
      batch_advantage_norm: The normalized advantage tensor computed for a
        batch of data. For advantage calculation, we use generalized
        advantage estimation (GAE) formula.
      policy_old_neg_logprob_tensor: The negative log probabilities from the
        policy rollouts.
      policy_action_tensor: The actions from the policy rollouts.
    """
        # Policy loss
        ppo_policy_loss_out = ppo_loss.ppo_policy_loss(
            neg_logprobs_old=policy_old_neg_logprob_tensor,
            actions=policy_action_tensor,
            advantages=batch_advantage_norm,
            dist_new=pd_new,
            mcts_sampling=self.mcts_sampling_enable)

        (self.policy_loss, self.approxkl, self.clipfrac,
         self.policy_ratio) = ppo_policy_loss_out

        # Value Loss
        if self._ppo2_enable:
            self.value_loss = ppo_loss.ppo2_value_loss(
                value_old=value_tensor,
                pred_value=self.value_new,
                returns=return_tensor)
        else:
            self.value_loss = ppo_loss.ppo1_value_loss(
                pred_value=self.value_new, returns=return_tensor)

        # MSE loss between mean and standard deviations
        self.mean_mse_loss, self.logstd_mse_loss = ppo_loss.l2_norm_policy_loss(
            policy_mean=self.mean_new,
            policy_logstd=self.logstd_new,
            mcts_mean=self.mean_old,
            mcts_logstd=self.logstd_old)

        mcts_dist = distributions.MultiVariateNormalDiag(
            mean=self.mean_old, logstd=self.logstd_old)
        policy_dist = distributions.MultiVariateNormalDiag(
            mean=self.mean_new, logstd=self.logstd_new)
        self.imitation_kl_divergence = tf.reduce_mean(
            policy_dist.kl_divergence(mcts_dist))
        # Calculate KL divergence and entropy of new distribution
        self.kl_divergence = tf.reduce_mean(pd_new.kl_divergence(pd_old))
        self.entropy = pd_new.entropy()

        # Calculate entropy loss
        self.entropy_loss = tf.reduce_mean(self.entropy)

        # Calulate total loss
        total_loss_ppo = (self._policy_coeff * self.policy_loss) + (
            self._value_coeff * self.value_loss) - (self._entropy_coeff *
                                                    self.entropy_loss)

        total_loss_mcts = (self._value_coeff * self.value_loss) + (
            self._mse_loss_coeff *
            (self.imitation_kl_divergence + self.entropy_loss))

        self.total_loss = tf.cond(tf.equal(self.mcts_sampling_enable,
                                           True), lambda: total_loss_mcts,
                                  lambda: total_loss_ppo)
Example #14
0
def create_train_op(optimizer,
                    grads_and_vars,
                    max_grad=1.0,
                    mixed_precision=False,
                    gradient_accumulation_steps=1):
    global_step = tf.train.get_or_create_global_step()

    if gradient_accumulation_steps > 1:
        local_step = tf.get_variable(name="local_step",
                                     shape=[],
                                     dtype=tf.int32,
                                     trainable=False,
                                     initializer=tf.zeros_initializer)
        batch_finite = tf.get_variable(name="batch_finite",
                                       shape=[],
                                       dtype=tf.bool,
                                       trainable=False,
                                       initializer=tf.ones_initializer)
        accum_vars = [
            tf.get_variable(name=tvar.name.split(":")[0] + "/accum",
                            shape=tvar.shape.as_list(),
                            dtype=tf.float32,
                            trainable=False,
                            initializer=tf.zeros_initializer())
            for tvar in tf.trainable_variables()
        ]

        reset_step = tf.cast(tf.math.equal(
            local_step % gradient_accumulation_steps, 0),
                             dtype=tf.bool)
        local_step = tf.cond(
            reset_step, lambda: local_step.assign(tf.ones_like(local_step)),
            lambda: local_step.assign_add(1))

        grads_and_vars_and_accums = [(gv[0], gv[1], accum_vars[i])
                                     for i, gv in enumerate(grads_and_vars)
                                     if gv[0] is not None]
        grads, tvars, accum_vars = list(zip(*grads_and_vars_and_accums))

        all_are_finite = tf.reduce_all([
            tf.reduce_all(tf.is_finite(g)) for g in grads
        ]) if mixed_precision else tf.constant(True, dtype=tf.bool)
        batch_finite = tf.cond(
            reset_step, lambda: batch_finite.assign(
                tf.math.logical_and(tf.constant(True, dtype=tf.bool),
                                    all_are_finite)),
            lambda: batch_finite.assign(
                tf.math.logical_and(batch_finite, all_are_finite)))

        # This is how the model was pre-trained.
        # ensure global norm is a finite number
        # to prevent clip_by_global_norm from having a hizzy fit.
        (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=max_grad)

        accum_vars = tf.cond(
            reset_step, lambda: [
                accum_vars[i].assign(grad)
                for i, grad in enumerate(clipped_grads)
            ], lambda: [
                accum_vars[i].assign_add(grad)
                for i, grad in enumerate(clipped_grads)
            ])

        def update(accum_vars):
            return optimizer.apply_gradients(list(zip(accum_vars, tvars)))

        update_step = tf.identity(tf.cast(tf.math.equal(
            local_step % gradient_accumulation_steps, 0),
                                          dtype=tf.bool),
                                  name="update_step")
        update_op = tf.cond(update_step, lambda: update(accum_vars),
                            lambda: tf.no_op())

        new_global_step = tf.cond(
            tf.math.logical_and(update_step, batch_finite),
            lambda: global_step + 1, lambda: global_step)
        new_global_step = tf.identity(new_global_step, name='step_update')
        train_op = tf.group(update_op, [global_step.assign(new_global_step)])
    else:
        grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
        grads, tvars = list(zip(*grads_and_vars))
        all_are_finite = tf.reduce_all([
            tf.reduce_all(tf.is_finite(g)) for g in grads
        ]) if mixed_precision else tf.constant(True, dtype=tf.bool)

        # This is how the model was pre-trained.
        # ensure global norm is a finite number
        # to prevent clip_by_global_norm from having a hizzy fit.
        (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=max_grad)

        # 这里不要传入global step,adam内部没有对global step累加
        # 而原本adam等tf内置优化器会累加,这样就会造成global step重复增加
        train_op = optimizer.apply_gradients(list(zip(clipped_grads, tvars)))

        new_global_step = tf.cond(all_are_finite, lambda: global_step + 1,
                                  lambda: global_step)
        new_global_step = tf.identity(new_global_step, name='step_update')

        train_op = tf.group(train_op, [global_step.assign(new_global_step)])
    return train_op
Example #15
0
    def __init__(self,
                 num_unique_documents,
                 vocab_size,
                 num_topics,
                 freqs,
                 embedding_size=128,
                 num_sampled=40,
                 learning_rate=1e-3,
                 lmbda=150.0,
                 alpha=None,
                 power=0.75,
                 batch_size=32,
                 clip_gradients=5.0,
                 **kwargs):
        device = get_device(**kwargs)
        _graph = tf.Graph()

        with _graph.as_default():
            with tf.device(device):
                moving_avgs = tf.train.ExponentialMovingAverage(0.9)
                self.batch_size = batch_size
                self.freqs = freqs

                self.X = tf.placeholder(tf.int32, shape=[None])
                self.Y = tf.placeholder(tf.int64, shape=[None])
                self.DOC = tf.placeholder(tf.int32, shape=[None])
                self.switch_loss = tf.Variable(0, trainable=False)
                train_labels = tf.reshape(self.Y, [-1, 1])
                sampler = tf.nn.fixed_unigram_candidate_sampler(
                    train_labels,
                    num_true=1,
                    num_sampled=num_sampled,
                    unique=True,
                    range_max=vocab_size,
                    distortion=power,
                    unigrams=self.freqs,
                )

                self.word_embedding = tf.Variable(
                    tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
                self.nce_weights = tf.Variable(
                    tf.truncated_normal(
                        [vocab_size, embedding_size],
                        stddev=tf.sqrt(1 / embedding_size),
                    ))
                self.nce_biases = tf.Variable(tf.zeros([vocab_size]))
                scalar = 1 / np.sqrt(num_unique_documents + num_topics)
                self.doc_embedding = tf.Variable(
                    tf.random_normal(
                        [num_unique_documents, num_topics],
                        mean=0,
                        stddev=50 * scalar,
                    ))
                self.topic_embedding = tf.get_variable(
                    'topic_embedding',
                    shape=[num_topics, embedding_size],
                    dtype=tf.float32,
                    initializer=tf.orthogonal_initializer(gain=scalar),
                )
                pivot = tf.nn.embedding_lookup(self.word_embedding, self.X)
                proportions = tf.nn.embedding_lookup(self.doc_embedding,
                                                     self.DOC)
                doc = tf.matmul(proportions, self.topic_embedding)
                doc_context = doc
                word_context = pivot
                context = tf.add(word_context, doc_context)
                loss_word2vec = tf.reduce_mean(
                    tf.nn.nce_loss(
                        weights=self.nce_weights,
                        biases=self.nce_biases,
                        labels=self.Y,
                        inputs=context,
                        num_sampled=num_sampled,
                        num_classes=vocab_size,
                        num_true=1,
                        sampled_values=sampler,
                    ))
                self.fraction = tf.Variable(1,
                                            trainable=False,
                                            dtype=tf.float32)

                n_topics = self.doc_embedding.get_shape()[1].value
                log_proportions = tf.nn.log_softmax(self.doc_embedding)
                if alpha is None:
                    alpha = 1.0 / n_topics
                loss = (alpha - 1) * log_proportions
                prior = tf.reduce_sum(loss)

                loss_lda = lmbda * self.fraction * prior
                global_step = tf.Variable(0,
                                          trainable=False,
                                          name='global_step')
                self.cost = tf.cond(
                    global_step < self.switch_loss,
                    lambda: loss_word2vec,
                    lambda: loss_word2vec + loss_lda,
                )
                loss_avgs_op = moving_avgs.apply(
                    [loss_lda, loss_word2vec, self.cost])
                with tf.control_dependencies([loss_avgs_op]):
                    optimizer = tf.train.AdamOptimizer(
                        learning_rate=learning_rate)
                    gvs = optimizer.compute_gradients(self.cost)
                    capped_gvs = [(
                        tf.clip_by_value(grad, -clip_gradients,
                                         clip_gradients),
                        var,
                    ) for grad, var in gvs]
                    self.optimizer = optimizer.apply_gradients(capped_gvs)
                self.sess = generate_session(_graph, **kwargs)
                self.sess.run(tf.global_variables_initializer())
Example #16
0
  def decode(self, tf_example_string_tensor):
    """Decodes serialized tensorflow example and returns a tensor dictionary.

    Args:
      tf_example_string_tensor: a string tensor holding a serialized tensorflow
        example proto.

    Returns:
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
      fields.InputDataFields.filename - string tensor with original dataset
        filename.
      fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
        [None, 4] containing box corners.
      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
        shape [None] indicating the weights of groundtruth boxes.
      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
        [None] containing containing object mask area in pixel squared.
      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
        [None] indicating if the boxes enclose a crowd.

    Optional:
      fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of
        shape [None] indicating if a class is present in the image (1.0) or
        a class is not present in the image (0.0).
      fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
        shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
        is width; 3rd dim is the number of additional channels.
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
        [None] indicating if the boxes represent `group_of` instances.
      fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
        shape [None, num_keypoints, 2] containing keypoints, where the
        coordinates of the keypoints are ordered (y, x).
      fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
        tensor of shape [None, num_keypoints] containing keypoint visibilites.
      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
      fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
        [None * num_classes] containing flattened multiclass scores for
        groundtruth boxes.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
    """
    serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
    decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
                                                    self.items_to_handlers)
    keys = decoder.list_items()
    tensors = decoder.decode(serialized_example, items=keys)
    tensor_dict = dict(zip(keys, tensors))
    is_crowd = fields.InputDataFields.groundtruth_is_crowd
    tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
    tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
    tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
        tensor_dict[fields.InputDataFields.image])[:2]

    if fields.InputDataFields.image_additional_channels in tensor_dict:
      channels = tensor_dict[fields.InputDataFields.image_additional_channels]
      channels = tf.squeeze(channels, axis=3)
      channels = tf.transpose(channels, perm=[1, 2, 0])
      tensor_dict[fields.InputDataFields.image_additional_channels] = channels

    def default_groundtruth_weights():
      return tf.ones(
          [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
          dtype=tf.float32)

    tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
        tf.greater(
            tf.shape(
                tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
            0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
        default_groundtruth_weights)

    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
      # Set all keypoints that are not labeled to NaN.
      gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
      gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
      visibilities_tiled = tf.tile(
          tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1),
          [1, 1, 2])
      tensor_dict[gt_kpt_fld] = tf.where(
          visibilities_tiled,
          tensor_dict[gt_kpt_fld],
          np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

    if self._expand_hierarchy_labels:
      input_fields = fields.InputDataFields
      image_classes, image_confidences = self._expand_image_label_hierarchy(
          tensor_dict[input_fields.groundtruth_image_classes],
          tensor_dict[input_fields.groundtruth_image_confidences])
      tensor_dict[input_fields.groundtruth_image_classes] = image_classes
      tensor_dict[input_fields.groundtruth_image_confidences] = (
          image_confidences)

      box_fields = [
          fields.InputDataFields.groundtruth_group_of,
          fields.InputDataFields.groundtruth_is_crowd,
          fields.InputDataFields.groundtruth_difficult,
          fields.InputDataFields.groundtruth_area,
          fields.InputDataFields.groundtruth_boxes,
          fields.InputDataFields.groundtruth_weights,
      ]

      def expand_field(field_name):
        return self._expansion_box_field_labels(
            tensor_dict[input_fields.groundtruth_classes],
            tensor_dict[field_name])

      # pylint: disable=cell-var-from-loop
      for field in box_fields:
        if field in tensor_dict:
          tensor_dict[field] = tf.cond(
              tf.size(tensor_dict[field]) > 0, lambda: expand_field(field),
              lambda: tensor_dict[field])
      # pylint: enable=cell-var-from-loop

      tensor_dict[input_fields.groundtruth_classes] = (
          self._expansion_box_field_labels(
              tensor_dict[input_fields.groundtruth_classes],
              tensor_dict[input_fields.groundtruth_classes], True))

    if fields.InputDataFields.groundtruth_group_of in tensor_dict:
      group_of = fields.InputDataFields.groundtruth_group_of
      tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)

    if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
      tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
          tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
          dtype=tf.int32)
      tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
          tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
          dtype=tf.int32)

    if fields.InputDataFields.groundtruth_track_ids in tensor_dict:
      tensor_dict[fields.InputDataFields.groundtruth_track_ids] = tf.cast(
          tensor_dict[fields.InputDataFields.groundtruth_track_ids],
          dtype=tf.int32)

    return tensor_dict
Example #17
0
    def call(self, x):
        input_image, y_pred, y_true, true_boxes = x

        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
        y_pred = tf.reshape(
            y_pred,
            tf.concat([tf.shape(input=y_pred)[:3],
                       tf.constant([3, -1])],
                      axis=0))

        # initialize the masks
        object_mask = tf.expand_dims(y_true[..., 4], 4)

        # the variable to keep track of number of batches processed
        batch_seen = tf.Variable(0.)

        # compute grid factor and net factor
        grid_h = tf.shape(input=y_true)[1]
        grid_w = tf.shape(input=y_true)[2]
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32),
                                 [1, 1, 1, 1, 2])

        net_h = tf.shape(input=input_image)[1]
        net_w = tf.shape(input=input_image)[2]
        net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32),
                                [1, 1, 1, 1, 2])
        """
        Adjust prediction
        """
        pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] +
                       tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
        pred_box_wh = y_pred[..., 2:4]  # t_wh
        pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]),
                                       4)  # adjust confidence
        pred_box_class = y_pred[..., 5:]  # adjust class probabilities
        """
        Adjust ground truth
        """
        true_box_xy = y_true[..., 0:2]  # (sigma(t_xy) + c_xy)
        true_box_wh = y_true[..., 2:4]  # t_wh
        true_box_conf = tf.expand_dims(y_true[..., 4], 4)
        true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1)
        """
        Compare each predicted box to all true boxes
        """
        # initially, drag all objectness of all boxes to 0
        conf_delta = pred_box_conf - 0

        # then, ignore the boxes which have good overlap with some true box
        true_xy = true_boxes[..., 0:2] / grid_factor
        true_wh = true_boxes[..., 2:4] / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
        pred_wh = tf.expand_dims(
            tf.exp(pred_box_wh) * self.anchors / net_factor, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)

        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4)
        conf_delta *= tf.expand_dims(
            tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4)
        """
        Compute some online statistics
        """
        true_xy = true_box_xy / grid_factor
        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = pred_box_xy / grid_factor
        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)
        iou_scores = object_mask * tf.expand_dims(iou_scores, 4)

        count = tf.reduce_sum(input_tensor=object_mask)
        count_noobj = tf.reduce_sum(input_tensor=1 - object_mask)
        detect_mask = tf.cast((pred_box_conf * object_mask) >= 0.5,
                              dtype=tf.float32)
        class_mask = tf.expand_dims(
            tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1),
                             true_box_class),
                    dtype=tf.float32), 4)
        recall50 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        recall75 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3)
        avg_obj = tf.reduce_sum(input_tensor=pred_box_conf *
                                object_mask) / (count + 1e-3)
        avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf *
                                  (1 - object_mask)) / (count_noobj + 1e-3)
        avg_cat = tf.reduce_sum(input_tensor=object_mask *
                                class_mask) / (count + 1e-3)
        """
        Warm-up training
        """
        batch_seen = tf.assign_add(batch_seen, 1.)

        true_box_xy, true_box_wh, xywh_mask = tf.cond(
            pred=tf.less(batch_seen, self.warmup_batches + 1),
            true_fn=lambda: [
                true_box_xy +
                (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) *
                (1 - object_mask), true_box_wh + tf.zeros_like(true_box_wh) *
                (1 - object_mask),
                tf.ones_like(object_mask)
            ],
            false_fn=lambda: [true_box_xy, true_box_wh, object_mask])
        """
        Compare each true box to all anchor boxes
        """
        wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
        wh_scale = tf.expand_dims(
            2 - wh_scale[..., 0] * wh_scale[..., 1],
            axis=4)  # the smaller the box, the bigger the scale

        xy_delta = xywh_mask * (pred_box_xy -
                                true_box_xy) * wh_scale * self.xywh_scale
        wh_delta = xywh_mask * (pred_box_wh -
                                true_box_wh) * wh_scale * self.xywh_scale
        conf_delta = object_mask * (
            pred_box_conf - true_box_conf) * self.obj_scale + (
                1 - object_mask) * conf_delta * self.noobj_scale
        class_delta = object_mask * \
                      tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
                      self.class_scale

        loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta),
                                axis=list(range(1, 5)))
        loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta),
                                axis=list(range(1, 5)))
        loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta),
                                  axis=list(range(1, 5)))
        loss_class = tf.reduce_sum(input_tensor=class_delta,
                                   axis=list(range(1, 5)))

        loss = loss_xy + loss_wh + loss_conf + loss_class

        loss = tf.Print(loss, [grid_h, avg_obj],
                        message='avg_obj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_noobj],
                        message='avg_noobj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_iou],
                        message='avg_iou \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_cat],
                        message='avg_cat \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall50],
                        message='recall50 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall75],
                        message='recall75 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, count],
                        message='count \t',
                        summarize=1000)
        loss = tf.Print(loss, [
            grid_h,
            tf.reduce_sum(input_tensor=loss_xy),
            tf.reduce_sum(input_tensor=loss_wh),
            tf.reduce_sum(input_tensor=loss_conf),
            tf.reduce_sum(input_tensor=loss_class)
        ],
                        message='loss xy, wh, conf, class: \t',
                        summarize=1000)

        return loss * self.grid_scale
Example #18
0
    def call(self, y_pred, mask=None):
        '''
        Returns:
            3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded
            to always yield `top_k` predictions per batch item. The last axis contains
            the coordinates for each predicted box in the format
            `[class_id, confidence, xmin, ymin, xmax, ymax]`.
        '''

        #####################################################################################
        # 1. Convert the box coordinates from predicted anchor box offsets to predicted
        #    absolute coordinates
        #####################################################################################

        # Extract the predicted class IDs as the indices of the highest confidence values.
        class_ids = tf.expand_dims(tf.to_float(
            tf.argmax(y_pred[..., :-12], axis=-1)),
                                   axis=-1)
        # Extract the confidences of the maximal classes.
        confidences = tf.reduce_max(y_pred[..., :-12], axis=-1, keep_dims=True)

        # Convert anchor box offsets to image offsets.
        cx = y_pred[..., -12] * y_pred[..., -4] * y_pred[..., -6] + y_pred[
            ..., -8]  # cx = cx_pred * cx_variance * w_anchor + cx_anchor
        cy = y_pred[..., -11] * y_pred[..., -3] * y_pred[..., -5] + y_pred[
            ..., -7]  # cy = cy_pred * cy_variance * h_anchor + cy_anchor
        w = tf.exp(y_pred[..., -10] * y_pred[..., -2]) * y_pred[
            ..., -6]  # w = exp(w_pred * variance_w) * w_anchor
        h = tf.exp(y_pred[..., -9] * y_pred[..., -1]) * y_pred[
            ..., -5]  # h = exp(h_pred * variance_h) * h_anchor

        # Convert 'centroids' to 'corners'.
        xmin = cx - 0.5 * w
        ymin = cy - 0.5 * h
        xmax = cx + 0.5 * w
        ymax = cy + 0.5 * h

        # If the model predicts box coordinates relative to the image dimensions and they are supposed
        # to be converted back to absolute coordinates, do that.
        def normalized_coords():
            xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
            ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
            xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
            ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
            return xmin1, ymin1, xmax1, ymax1

        def non_normalized_coords():
            return tf.expand_dims(xmin, axis=-1), tf.expand_dims(
                ymin,
                axis=-1), tf.expand_dims(xmax,
                                         axis=-1), tf.expand_dims(ymax,
                                                                  axis=-1)

        xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords,
                                         normalized_coords,
                                         non_normalized_coords)

        # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor.
        y_pred = tf.concat(
            values=[class_ids, confidences, xmin, ymin, xmax, ymax], axis=-1)

        #####################################################################################
        # 2. Perform confidence thresholding, non-maximum suppression, and top-k filtering.
        #####################################################################################

        batch_size = tf.shape(y_pred)[0]  # Output dtype: tf.int32
        n_boxes = tf.shape(y_pred)[1]
        n_classes = y_pred.shape[2] - 4
        class_indices = tf.range(1, n_classes)

        # Create a function that filters the predictions for the given batch item. Specifically, it performs:
        # - confidence thresholding
        # - non-maximum suppression (NMS)
        # - top-k filtering
        def filter_predictions(batch_item):

            # Keep only the non-background boxes.
            positive_boxes = tf.not_equal(batch_item[..., 0], 0.0)
            predictions = tf.boolean_mask(tensor=batch_item,
                                          mask=positive_boxes)

            def perform_confidence_thresholding():
                # Apply confidence thresholding.
                threshold_met = predictions[:, 1] > self.tf_confidence_thresh
                return tf.boolean_mask(tensor=predictions, mask=threshold_met)

            def no_positive_boxes():
                return tf.constant(value=0.0, shape=(1, 6))

            # If there are any positive predictions, perform confidence thresholding.
            predictions_conf_thresh = tf.cond(
                tf.equal(tf.size(predictions), 0), no_positive_boxes,
                perform_confidence_thresholding)

            def perform_nms():
                scores = predictions_conf_thresh[..., 1]

                # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
                xmin = tf.expand_dims(predictions_conf_thresh[..., -4],
                                      axis=-1)
                ymin = tf.expand_dims(predictions_conf_thresh[..., -3],
                                      axis=-1)
                xmax = tf.expand_dims(predictions_conf_thresh[..., -2],
                                      axis=-1)
                ymax = tf.expand_dims(predictions_conf_thresh[..., -1],
                                      axis=-1)
                boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)

                maxima_indices = tf.image.non_max_suppression(
                    boxes=boxes,
                    scores=scores,
                    max_output_size=self.tf_nms_max_output_size,
                    iou_threshold=self.iou_threshold,
                    name='non_maximum_suppresion')
                maxima = tf.gather(params=predictions_conf_thresh,
                                   indices=maxima_indices,
                                   axis=0)
                return maxima

            def no_confident_predictions():
                return tf.constant(value=0.0, shape=(1, 6))

            # If any boxes made the threshold, perform NMS.
            predictions_nms = tf.cond(
                tf.equal(tf.size(predictions_conf_thresh), 0),
                no_confident_predictions, perform_nms)

            # Perform top-k filtering for this batch item or pad it in case there are
            # fewer than `self.top_k` boxes left at this point. Either way, produce a
            # tensor of length `self.top_k`. By the time we return the final results tensor
            # for the whole batch, all batch items must have the same number of predicted
            # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
            # predictions are left after the filtering process above, we pad the missing
            # predictions with zeros as dummy entries.
            def top_k():
                return tf.gather(params=predictions_nms,
                                 indices=tf.nn.top_k(predictions_nms[:, 1],
                                                     k=self.tf_top_k,
                                                     sorted=True).indices,
                                 axis=0)

            def pad_and_top_k():
                padded_predictions = tf.pad(tensor=predictions_nms,
                                            paddings=[[
                                                0, self.tf_top_k -
                                                tf.shape(predictions_nms)[0]
                                            ], [0, 0]],
                                            mode='CONSTANT',
                                            constant_values=0.0)
                return tf.gather(params=padded_predictions,
                                 indices=tf.nn.top_k(padded_predictions[:, 1],
                                                     k=self.tf_top_k,
                                                     sorted=True).indices,
                                 axis=0)

            top_k_boxes = tf.cond(
                tf.greater_equal(tf.shape(predictions_nms)[0], self.tf_top_k),
                top_k, pad_and_top_k)

            return top_k_boxes

        # Iterate `filter_predictions()` over all batch items.
        output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x),
                                  elems=y_pred,
                                  dtype=None,
                                  parallel_iterations=128,
                                  back_prop=False,
                                  swap_memory=False,
                                  infer_shape=True,
                                  name='loop_over_batch')

        return output_tensor
Example #19
0
def patch_image(image,
                bboxes=None,
                offset_height=0,
                offset_width=0,
                target_height=None,
                target_width=None):
    """Gets a patch using tf.image.crop_to_bounding_box and adjusts bboxes

    If patching would leave us with zero bboxes, we return the image and bboxes
    unchanged.

    Args:
        image: Float32 Tensor with shape (H, W, 3).
        bboxes: Tensor with the ground-truth boxes. Shaped (total_boxes, 5).
            The last element in each box is the category label.
        offset_height: Height of the upper-left corner of the patch with
            respect to the original image. Non-negative.
        offset_width: Width of the upper-left corner of the patch with respect
            to the original image. Non-negative.
        target_height: Height of the patch. If set to none, it will be the
            maximum (tf.shape(image)[0] - offset_height - 1). Positive.
        target_width: Width of the patch. If set to none, it will be the
            maximum (tf.shape(image)[1] - offset_width - 1). Positive.

    Returns:
        image: Patch of the original image.
        bboxes: Adjusted bboxes (only those whose centers are inside the
            patch). The key isn't set if bboxes is None.
    """
    # TODO: make this function safe with respect to senseless inputs (i.e
    # having an offset_height that's larger than tf.shape(image)[0], etc.)
    # As of now we only use it inside random_patch, which already makes sure
    # the arguments are legal.
    im_shape = tf.shape(image)
    if target_height is None:
        target_height = im_shape[0] - offset_height - 1
    if target_width is None:
        target_width = im_shape[1] - offset_width - 1

    new_image = tf.image.crop_to_bounding_box(
        image,
        offset_height=offset_height,
        offset_width=offset_width,
        target_height=target_height,
        target_width=target_width,
    )
    patch_shape = tf.shape(new_image)

    # Return if we didn't have bboxes.
    if bboxes is None:
        # Resize the patch to the original image's size. This is to make sure
        # we respect restrictions in image size in the models.
        new_image_resized = tf.image.resize_images(
            new_image, im_shape[:2], method=tf.image.ResizeMethod.BILINEAR)
        return_dict = {"image": new_image_resized}
        return return_dict

    # Now we will remove all bboxes whose centers are not inside the cropped
    # image.

    # First get the x  and y coordinates of the center of each of the
    # bboxes.
    bboxes_center_x = tf.reduce_mean(
        tf.concat(
            [
                # bboxes[:, 0] gets a Tensor with shape (20,).
                # We do this to get a Tensor with shape (20, 1).
                bboxes[:, 0:1],
                bboxes[:, 2:3],
            ],
            axis=1,
        ))
    bboxes_center_y = tf.reduce_mean(tf.concat(
        [bboxes[:, 1:2], bboxes[:, 3:4]], axis=1),
                                     axis=1)

    # Now we get a boolean tensor holding for each of the bboxes' centers
    # wheter they are inside the patch.
    center_x_is_inside = tf.logical_and(
        tf.greater(bboxes_center_x, offset_width),
        tf.less(bboxes_center_x, tf.add(target_width, offset_width)))
    center_y_is_inside = tf.logical_and(
        tf.greater(bboxes_center_y, offset_height),
        tf.less(bboxes_center_y, tf.add(target_height, offset_height)))
    center_is_inside = tf.logical_and(center_x_is_inside, center_y_is_inside)

    # Now we mask the bboxes, removing all those whose centers are outside
    # the patch.
    masked_bboxes = tf.boolean_mask(bboxes, center_is_inside)
    # We move the bboxes to the right place, clipping them if
    # necessary.
    new_bboxes_unclipped = tf.concat(
        [
            tf.subtract(masked_bboxes[:, 0:1], offset_width),
            tf.subtract(masked_bboxes[:, 1:2], offset_height),
            tf.subtract(masked_bboxes[:, 2:3], offset_width),
            tf.subtract(masked_bboxes[:, 3:4], offset_height),
        ],
        axis=1,
    )
    # Finally, we clip the boxes and add back the labels.
    new_bboxes = tf.concat(
        [
            tf.to_int32(
                clip_boxes(new_bboxes_unclipped, imshape=patch_shape[:2]), ),
            masked_bboxes[:, 4:],
        ],
        axis=1,
    )
    # Now resize the image to the original size and adjust bboxes accordingly
    new_image_resized = tf.image.resize_images(
        new_image, im_shape[:2], method=tf.image.ResizeMethod.BILINEAR)
    # adjust_bboxes requires height and width values with dtype=float32
    new_bboxes_resized = adjust_bboxes(
        new_bboxes,
        old_height=tf.to_float(patch_shape[0]),
        old_width=tf.to_float(patch_shape[1]),
        new_height=tf.to_float(im_shape[0]),
        new_width=tf.to_float(im_shape[1]),
    )

    # Finally, set up the return dict, but only update the image and bboxes if
    # our patch has at least one bbox in it.
    update_condition = tf.greater_equal(tf.shape(new_bboxes_resized)[0], 1)
    return_dict = {}
    return_dict["image"] = tf.cond(update_condition, lambda: new_image_resized,
                                   lambda: image)
    return_dict["bboxes"] = tf.cond(update_condition,
                                    lambda: new_bboxes_resized, lambda: bboxes)
    return return_dict
Example #20
0
        def filter_predictions(batch_item):

            # Keep only the non-background boxes.
            positive_boxes = tf.not_equal(batch_item[..., 0], 0.0)
            predictions = tf.boolean_mask(tensor=batch_item,
                                          mask=positive_boxes)

            def perform_confidence_thresholding():
                # Apply confidence thresholding.
                threshold_met = predictions[:, 1] > self.tf_confidence_thresh
                return tf.boolean_mask(tensor=predictions, mask=threshold_met)

            def no_positive_boxes():
                return tf.constant(value=0.0, shape=(1, 6))

            # If there are any positive predictions, perform confidence thresholding.
            predictions_conf_thresh = tf.cond(
                tf.equal(tf.size(predictions), 0), no_positive_boxes,
                perform_confidence_thresholding)

            def perform_nms():
                scores = predictions_conf_thresh[..., 1]

                # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
                xmin = tf.expand_dims(predictions_conf_thresh[..., -4],
                                      axis=-1)
                ymin = tf.expand_dims(predictions_conf_thresh[..., -3],
                                      axis=-1)
                xmax = tf.expand_dims(predictions_conf_thresh[..., -2],
                                      axis=-1)
                ymax = tf.expand_dims(predictions_conf_thresh[..., -1],
                                      axis=-1)
                boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)

                maxima_indices = tf.image.non_max_suppression(
                    boxes=boxes,
                    scores=scores,
                    max_output_size=self.tf_nms_max_output_size,
                    iou_threshold=self.iou_threshold,
                    name='non_maximum_suppresion')
                maxima = tf.gather(params=predictions_conf_thresh,
                                   indices=maxima_indices,
                                   axis=0)
                return maxima

            def no_confident_predictions():
                return tf.constant(value=0.0, shape=(1, 6))

            # If any boxes made the threshold, perform NMS.
            predictions_nms = tf.cond(
                tf.equal(tf.size(predictions_conf_thresh), 0),
                no_confident_predictions, perform_nms)

            # Perform top-k filtering for this batch item or pad it in case there are
            # fewer than `self.top_k` boxes left at this point. Either way, produce a
            # tensor of length `self.top_k`. By the time we return the final results tensor
            # for the whole batch, all batch items must have the same number of predicted
            # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
            # predictions are left after the filtering process above, we pad the missing
            # predictions with zeros as dummy entries.
            def top_k():
                return tf.gather(params=predictions_nms,
                                 indices=tf.nn.top_k(predictions_nms[:, 1],
                                                     k=self.tf_top_k,
                                                     sorted=True).indices,
                                 axis=0)

            def pad_and_top_k():
                padded_predictions = tf.pad(tensor=predictions_nms,
                                            paddings=[[
                                                0, self.tf_top_k -
                                                tf.shape(predictions_nms)[0]
                                            ], [0, 0]],
                                            mode='CONSTANT',
                                            constant_values=0.0)
                return tf.gather(params=padded_predictions,
                                 indices=tf.nn.top_k(padded_predictions[:, 1],
                                                     k=self.tf_top_k,
                                                     sorted=True).indices,
                                 axis=0)

            top_k_boxes = tf.cond(
                tf.greater_equal(tf.shape(predictions_nms)[0], self.tf_top_k),
                top_k, pad_and_top_k)

            return top_k_boxes
Example #21
0
def main(unused_argv=None):
    tf.logging.set_verbosity(FLAGS.log)

    if FLAGS.config is None:
        raise RuntimeError("No config name specified.")

    config = utils.get_module("wavenet." + FLAGS.config).Config(
        FLAGS.train_path)

    logdir = FLAGS.logdir
    tf.logging.info("Saving to %s" % logdir)

    with tf.Graph().as_default():
        total_batch_size = FLAGS.total_batch_size
        assert total_batch_size % FLAGS.worker_replicas == 0
        worker_batch_size = total_batch_size / FLAGS.worker_replicas

        # Run the Reader on the CPU
        cpu_device = "/job:localhost/replica:0/task:0/cpu:0"
        if FLAGS.ps_tasks:
            cpu_device = "/job:worker/cpu:0"

        with tf.device(cpu_device):
            inputs_dict = config.get_batch(worker_batch_size)

        with tf.device(
                tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks,
                                               merge_devices=True)):
            global_step = tf.get_variable(
                "global_step", [],
                tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            # pylint: disable=cell-var-from-loop
            lr = tf.constant(config.learning_rate_schedule[0])
            for key, value in config.learning_rate_schedule.items():
                lr = tf.cond(tf.less(global_step, key), lambda: lr,
                             lambda: tf.constant(value))
            # pylint: enable=cell-var-from-loop
            tf.summary.scalar("learning_rate", lr)

            # build the model graph
            outputs_dict = config.build(inputs_dict, is_training=True)
            loss = outputs_dict["loss"]
            tf.summary.scalar("train_loss", loss)

            worker_replicas = FLAGS.worker_replicas
            ema = tf.train.ExponentialMovingAverage(decay=0.9999,
                                                    num_updates=global_step)
            opt = tf.train.SyncReplicasOptimizer(
                tf.train.AdamOptimizer(lr, epsilon=1e-8),
                worker_replicas,
                total_num_replicas=worker_replicas,
                variable_averages=ema,
                variables_to_average=tf.trainable_variables())

            train_op = opt.minimize(loss,
                                    global_step=global_step,
                                    name="train",
                                    colocate_gradients_with_ops=True)

            session_config = tf.ConfigProto(allow_soft_placement=True)

            is_chief = (FLAGS.task == 0)
            local_init_op = opt.chief_init_op if is_chief else opt.local_step_init_op

            slim.learning.train(
                train_op=train_op,
                logdir=logdir,
                is_chief=is_chief,
                master=FLAGS.master,
                number_of_steps=config.num_iters,
                global_step=global_step,
                log_every_n_steps=250,
                local_init_op=local_init_op,
                save_interval_secs=300,
                sync_optimizer=opt,
                session_config=session_config,
            )
def train(flags):
  """Training entry point."""
  log_dir = flags.log_dir
  flags.pretrained_model_dir = log_dir
  log_dir = os.path.join(log_dir, 'train')
  flags.eval_interval_secs = 0
  with tf.Graph().as_default():
    global_step = tf.Variable(
        0, trainable=False, name='global_step', dtype=tf.int64)
    global_step_confidence = tf.Variable(
        0, trainable=False, name='global_step_confidence', dtype=tf.int64)

    model = build_model(flags)
    images_query_pl, labels_query_pl, \
    images_support_pl, labels_support_pl = \
      build_episode_placeholder(flags)

    # Augments the input.
    if flags.dataset == 'cifar10' or flags.dataset == 'cifar100':
      images_query_pl_aug = data_loader.augment_cifar(
          images_query_pl, is_training=True)
      images_support_pl_aug = data_loader.augment_cifar(
          images_support_pl, is_training=True)
    elif flags.dataset == 'tinyimagenet':
      images_query_pl_aug = data_loader.augment_tinyimagenet(
          images_query_pl, is_training=True)
      images_support_pl_aug = data_loader.augment_tinyimagenet(
          images_support_pl, is_training=True)

    logits, logits_z = build_proto_train_graph(
        images_query=images_query_pl_aug,
        images_support=images_support_pl_aug,
        flags=flags,
        is_training=True,
        model=model)
    # Losses and optimizer
    ## Classification loss
    loss_classification = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            logits=logits,
            labels=tf.one_hot(labels_query_pl, flags.num_classes_train)))

    # Confidence loss
    _, top_k_indices = tf.nn.top_k(logits, k=1)
    pred = tf.squeeze(top_k_indices)
    incorrect_mask = tf.math.logical_not(tf.math.equal(pred, labels_query_pl))
    incorrect_logits_z = tf.boolean_mask(logits_z, incorrect_mask)
    incorrect_labels_z = tf.boolean_mask(labels_query_pl, incorrect_mask)
    signal_variance = tf.math.reduce_sum(tf.cast(incorrect_mask, tf.int32))
    loss_variance_incorrect = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            logits=incorrect_logits_z,
            labels=tf.one_hot(incorrect_labels_z, flags.num_classes_train)))
    loss_variance_zero = 0.0
    loss_confidence = tf.cond(
        tf.greater(signal_variance, 0), lambda: loss_variance_incorrect,
        lambda: loss_variance_zero)

    regu_losses = tf.losses.get_regularization_losses()
    loss = tf.add_n([loss_classification] + regu_losses)

    # Learning rate
    if flags.lr_anneal == 'const':
      learning_rate = flags.init_learning_rate
    elif flags.lr_anneal == 'pwc':
      learning_rate = get_pwc_learning_rate(global_step, flags)
    elif flags.lr_anneal == 'exp':
      lr_decay_step = flags.number_of_steps // flags.n_lr_decay
      learning_rate = tf.train.exponential_decay(
          flags.init_learning_rate,
          global_step,
          lr_decay_step,
          1.0 / flags.lr_decay_rate,
          staircase=True)
    else:
      raise Exception('Not implemented')

    # Optimizer
    optimizer = tf.train.MomentumOptimizer(
        learning_rate=learning_rate, momentum=0.9)
    optimizer_confidence = tf.train.MomentumOptimizer(
        learning_rate=learning_rate, momentum=0.9)

    train_op = contrib_slim.learning.create_train_op(
        total_loss=loss,
        optimizer=optimizer,
        global_step=global_step,
        clip_gradient_norm=flags.clip_gradient_norm)
    variable_variance = []
    for v in tf.trainable_variables():
      if 'fc_variance' in v.name:
        variable_variance.append(v)
    train_op_confidence = contrib_slim.learning.create_train_op(
        total_loss=loss_confidence,
        optimizer=optimizer_confidence,
        global_step=global_step_confidence,
        clip_gradient_norm=flags.clip_gradient_norm,
        variables_to_train=variable_variance)

    tf.summary.scalar('loss', loss)
    tf.summary.scalar('loss_classification', loss_classification)
    tf.summary.scalar('loss_variance', loss_confidence)
    tf.summary.scalar('regu_loss', tf.add_n(regu_losses))
    tf.summary.scalar('learning_rate', learning_rate)
    # Merges all summaries except for pretrain
    summary = tf.summary.merge(
        tf.get_collection('summaries', scope='(?!pretrain).*'))

    # Gets datasets
    few_shot_data_train, test_dataset, train_dataset = get_train_datasets(flags)
    # Defines session and logging
    summary_writer_train = tf.summary.FileWriter(log_dir, flush_secs=1)
    saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True)
    print(saver.saver_def.filename_tensor_name)
    print(saver.saver_def.restore_op_name)
    # pylint: disable=unused-variable
    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()
    supervisor = tf.train.Supervisor(
        logdir=log_dir,
        init_feed_dict=None,
        summary_op=None,
        init_op=tf.global_variables_initializer(),
        summary_writer=summary_writer_train,
        saver=saver,
        global_step=global_step,
        save_summaries_secs=flags.save_summaries_secs,
        save_model_secs=0)

    with supervisor.managed_session() as sess:
      checkpoint_step = sess.run(global_step)
      if checkpoint_step > 0:
        checkpoint_step += 1
      eval_interval_steps = flags.eval_interval_steps
      for step in range(checkpoint_step, flags.number_of_steps):
        # Computes the classification loss using a batch of data.
        images_query, labels_query,\
        images_support, labels_support = \
          few_shot_data_train.next_few_shot_batch(
              query_batch_size_per_task=flags.train_batch_size,
              num_classes_per_task=flags.num_classes_train,
              num_supports_per_class=flags.num_shots_train,
              num_tasks=flags.num_tasks_per_batch)

        feed_dict = {
            images_query_pl: images_query.astype(dtype=np.float32),
            labels_query_pl: labels_query,
            images_support_pl: images_support.astype(dtype=np.float32),
            labels_support_pl: labels_support
        }

        t_batch = time.time()
        dt_batch = time.time() - t_batch

        t_train = time.time()
        loss, loss_confidence = sess.run([train_op, train_op_confidence],
                                         feed_dict=feed_dict)
        dt_train = time.time() - t_train

        if step % 100 == 0:
          summary_str = sess.run(summary, feed_dict=feed_dict)
          summary_writer_train.add_summary(summary_str, step)
          summary_writer_train.flush()
          logging.info('step %d, loss : %.4g, dt: %.3gs, dt_batch: %.3gs', step,
                       loss, dt_train, dt_batch)

        if float(step) / flags.number_of_steps > 0.5:
          eval_interval_steps = flags.eval_interval_fine_steps

        if eval_interval_steps > 0 and step % eval_interval_steps == 0:
          saver.save(sess, os.path.join(log_dir, 'model'), global_step=step)
          eval(
              flags=flags,
              train_dataset=train_dataset,
              test_dataset=test_dataset)

        if float(
            step
        ) > 0.5 * flags.number_of_steps + flags.number_of_steps_to_early_stop:
          break
Example #23
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)

        # Gets original image and its size.
        image = data['image']

        if self._aug_policy:
            if AUTOAUG_IMPORTED:
                image, boxes = autoaugment_utils.distort_image_with_autoaugment(
                    image, boxes, self._aug_policy)
            else:
                raise ImportError(
                    'Unable to get autoaugment_utils, likely due '
                    'to imcompatability with TF 2.X.')

        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes = input_utils.random_horizontal_flip(image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=input_utils.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size,
                                     (image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(input_anchor,
                                              self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets,
         num_positives) = anchor_labeler.label_anchors(
             boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': input_anchor.multilevel_boxes,
            'num_positives': num_positives,
            'image_info': image_info,
        }
        return image, labels
Example #24
0
  # # This is how the model was pre-trained.
  #(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
  // Change 11 clip grads
  grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
      grads, tvars = list(zip(*grads_and_vars))
      all_are_finite = tf.reduce_all(
          [tf.reduce_all(tf.is_finite(g)) for g in grads]) if use_fp16 or manual_fp16 else tf.constant(True, dtype=tf.bool)

  # This is how the model was pre-trained.
  # ensure global norm is a finite number
  # to prevent clip_by_global_norm from having a hizzy fit.
  (clipped_grads, _) = tf.clip_by_global_norm(
      grads, clip_norm=1.0,
      use_norm=tf.cond(
          all_are_finite,
          lambda: tf.global_norm(grads),
          lambda: tf.constant(1.0)))

  #train_op = optimizer.apply_gradients(
  #    list(zip(grads, tvars)), global_step=global_step)
  // Change 12 apply grads using the cliped grads
  train_op = optimizer.apply_gradients(
          list(zip(clipped_grads, tvars)), global_step=global_step)

  # Normally the global step update is done inside of `apply_gradients`.
  # However, neither `AdamWeightDecayOptimizer` nor `LAMBOptimizer` do this.
  # But if you use a different optimizer, you should probably take this line
  # out.
  new_global_step = global_step + 1
  train_op = tf.group(train_op, [global_step.assign(new_global_step)])
  return train_op
Example #25
0
def _generate_detections_tf(cls_outputs,
                            box_outputs,
                            anchor_boxes,
                            indices,
                            classes,
                            image_id,
                            image_scale,
                            num_classes,
                            min_score_thresh=0.2,
                            max_boxes_to_draw=50,
                            soft_nms_sigma=0.0,
                            iou_threshold=0.5,
                            use_native_nms=True):
    """Generates detections with model outputs and anchors.

  Args:
    cls_outputs: a numpy array with shape [N, 1], which has the highest class
      scores on all feature levels. The N is the number of selected
      top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
    box_outputs: a numpy array with shape [N, 4], which stacks box regression
      outputs on all feature levels. The N is the number of selected top-k
      total anchors on all levels. (k being MAX_DETECTION_POINTS)
    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
      feature levels. The N is the number of selected top-k total anchors on
      all levels.
    indices: a numpy array with shape [N], which is the indices from top-k
      selection.
    classes: a numpy array with shape [N], which represents the class
      prediction on all selected anchors from top-k selection.
    image_id: an integer number to specify the image id.
    image_scale: a float tensor representing the scale between original image
      and input image for the detector. It is used to rescale detections for
      evaluating with the original groundtruth annotations.
    num_classes: a integer that indicates the number of classes.
    min_score_thresh: A float representing the threshold for deciding when to
      remove boxes based on score.
    max_boxes_to_draw: Max number of boxes to draw.
    soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter;
      See Bodla et al, https://arxiv.org/abs/1704.04503).  When
        `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
        NMS.
    iou_threshold: A float representing the threshold for deciding whether boxes
      overlap too much with respect to IOU.
    use_native_nms: a bool that indicates whether to use native nms.

  Returns:
    detections: detection results in a tensor with each row representing
      [image_id, y, x, height, width, score, class]
  """
    anchor_boxes = tf.gather(anchor_boxes, indices)

    scores = tf.math.sigmoid(cls_outputs)
    # apply bounding box regression to anchors
    boxes = decode_box_outputs_tf(tf.transpose(box_outputs, [1, 0]),
                                  tf.transpose(anchor_boxes, [1, 0]))

    def _else(detections, class_id, indices):
        """Else branch for generating detections."""
        boxes_cls = tf.gather(boxes, indices)
        scores_cls = tf.gather(scores, indices)
        # Select top-scoring boxes in each class and apply non-maximum suppression
        # (nms) for boxes in the same class. The selected boxes from each class are
        # then concatenated for the final detection outputs.

        if use_native_nms:
            top_detection_idx, scores_cls = tf.image.non_max_suppression_with_scores(
                boxes_cls,
                scores_cls,
                max_boxes_to_draw,
                iou_threshold=iou_threshold,
                score_threshold=min_score_thresh,
                soft_nms_sigma=soft_nms_sigma)
            scores_cls = tf.expand_dims(scores_cls, axis=1)
            boxes_cls = tf.gather(boxes_cls, top_detection_idx)
            top_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1)
        else:
            scores_cls = tf.expand_dims(scores_cls, axis=1)
            all_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1)
            top_detection_idx = nms_tf(all_detections_cls, iou_threshold)
            top_detections_cls = tf.gather(all_detections_cls,
                                           top_detection_idx)
        height = top_detections_cls[:, 2] - top_detections_cls[:, 0]
        width = top_detections_cls[:, 3] - top_detections_cls[:, 1]
        top_detections_cls = tf.stack([
            top_detections_cls[:, 0] * image_scale,
            top_detections_cls[:, 1] * image_scale, height * image_scale,
            width * image_scale, top_detections_cls[:, 4]
        ],
                                      axis=-1)

        top_detections_cls = tf.stack([
            tf.cast(tf.repeat(image_id, tf.size(top_detection_idx)),
                    tf.float32), *tf.unstack(top_detections_cls, 5, axis=1),
            tf.repeat(class_id + 1.0, tf.size(top_detection_idx))
        ],
                                      axis=1)

        detections = tf.concat([detections, top_detections_cls], axis=0)

        return detections

    detections = tf.constant([], tf.float32, [0, 7])
    for c in range(num_classes):
        indices_cls = tf.squeeze(tf.where_v2(tf.equal(classes, c)), axis=-1)
        detections = tf.cond(
            tf.equal(tf.size(indices), 0),
            lambda: detections,
            lambda id=c, id_cls=indices_cls: _else(detections, id, id_cls))
    indices_final = tf.argsort(detections[:, -2], direction='DESCENDING')
    detections = tf.gather(detections,
                           indices_final[:max_boxes_to_draw],
                           name='detection')
    return detections
Example #26
0
def _generate_detections_tf(cls_outputs,
                            box_outputs,
                            anchor_boxes,
                            indices,
                            classes,
                            image_id,
                            image_scale,
                            num_classes,
                            use_native_nms=False):
    """Generates detections with model outputs and anchors.

  Args:
    cls_outputs: a numpy array with shape [N, 1], which has the highest class
      scores on all feature levels. The N is the number of selected
      top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
    box_outputs: a numpy array with shape [N, 4], which stacks box regression
      outputs on all feature levels. The N is the number of selected top-k
      total anchors on all levels. (k being MAX_DETECTION_POINTS)
    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
      feature levels. The N is the number of selected top-k total anchors on
      all levels.
    indices: a numpy array with shape [N], which is the indices from top-k
      selection.
    classes: a numpy array with shape [N], which represents the class
      prediction on all selected anchors from top-k selection.
    image_id: an integer number to specify the image id.
    image_scale: a float tensor representing the scale between original image
      and input image for the detector. It is used to rescale detections for
      evaluating with the original groundtruth annotations.
    num_classes: a integer that indicates the number of classes.
    use_native_nms: a bool that indicates whether to use native nms.

  Returns:
    detections: detection results in a tensor with each row representing
      [image_id, y, x, height, width, score, class]
  """
    anchor_boxes = tf.gather(anchor_boxes, indices)

    scores = tf.math.sigmoid(cls_outputs)
    # apply bounding box regression to anchors
    boxes = decode_box_outputs_tf(tf.transpose(box_outputs, [1, 0]),
                                  tf.transpose(anchor_boxes, [1, 0]))

    def _else(detections, class_id):
        """Else branch forr generating detections."""
        boxes_cls = tf.gather(boxes, indices)
        scores_cls = tf.gather(scores, indices)
        # Select top-scoring boxes in each class and apply non-maximum suppression
        # (nms) for boxes in the same class. The selected boxes from each class are
        # then concatenated for the final detection outputs.
        all_detections_cls = tf.concat(
            [tf.reshape(boxes_cls, [-1, 4]), scores_cls], axis=1)
        if use_native_nms:
            top_detection_idx = tf.image.non_max_suppression(
                all_detections_cls[:, :4],
                all_detections_cls[:, 4],
                MAX_DETECTIONS_PER_IMAGE,
                iou_threshold=0.5)
        else:
            top_detection_idx = nms_tf(all_detections_cls, 0.5)
        top_detections_cls = tf.gather(all_detections_cls, top_detection_idx)
        height = top_detections_cls[:, 2] - top_detections_cls[:, 0]
        width = top_detections_cls[:, 3] - top_detections_cls[:, 1]
        top_detections_cls = tf.stack([
            top_detections_cls[:, 0] * image_scale,
            top_detections_cls[:, 1] * image_scale, height * image_scale,
            width * image_scale, top_detections_cls[:, 4]
        ],
                                      axis=-1)

        top_detections_cls = tf.stack([
            tf.cast(tf.repeat(image_id, tf.size(top_detection_idx)),
                    tf.float32), *tf.unstack(top_detections_cls, 5, axis=1),
            tf.repeat(class_id + 1.0, tf.size(top_detection_idx))
        ],
                                      axis=1)

        detections = tf.concat([detections, top_detections_cls], axis=0)
        return detections

    detections = tf.constant([], tf.float32, [0, 7])
    for c in range(num_classes):
        indices = tf.where(tf.equal(classes, c))
        detections = tf.cond(tf.equal(tf.shape(indices)[0], 0),
                             lambda: detections,
                             lambda class_id=c: _else(detections, class_id))

    return tf.identity(detections, name='detection')
  def last_value_quantize(self,
                          inputs,
                          per_channel=False,
                          init_min=-6.0,
                          init_max=6.0,
                          name_prefix='FixedValueQuant',
                          reuse=None,
                          is_training=False,
                          num_bits=8,
                          narrow_range=False,
                          relative_quantile=0,
                          freeze=False,
                          quant_delay=False):
    """Adds a layer that collects quantization ranges as last input ranges.

    LastValueQuantize creates variables called 'min' and 'max', representing the
    interval used for quantization and clamping.

    Args:
      inputs: a tensor containing values to be quantized.
      per_channel: (Optional) a boolean specifying whether to use different
        quantization ranges per output channel.
      init_min: a float scalar, the initial value for variable min.
      init_max: a float scalar, the initial value for variable max.
      name_prefix: name_prefix for created nodes.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      is_training: Whether the op is applied to a training or eval graph.
      num_bits: Number of bits to use for quantization, must be between 2 and 8.
      narrow_range: Whether to use the narrow quantization range
        [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
      relative_quantile: Specify the location of quantization min and max
        parameters. relative_quantile = 0 is equivalent to using min and max
        of input; relative_quantile = 1 set min and max the optimal location
        assuming the input distribution is uniform. In reality, a good value
        should be in the range [0 1].
      freeze: If True, the min and max variables are calculated once at the
        begining of training and then freeze. This is used for quantized
        fine-tuning of a pretrained checkpoint. If False, the min and max are
        calculated and updated every cycle.
      quant_delay: The number of global steps after which the fake quantization
        are turned on. Used for performing fine-tuning experiment without
        starting from a pre-trained checkpoint.
    Returns:
      a tensor containing quantized values.
    """

    with tf.variable_scope(
        None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope:
      scope.set_partitioner(None)
      input_shape = inputs.get_shape()
      input_dim = len(input_shape)
      if per_channel:
        # Only support quantizing 1-, 2- and 4-dimensional tensors.
        assert input_dim in [1, 2, 4]
        min_max_shape = [input_shape[-1]]
      else:
        min_max_shape = []

      min_var = tf.get_variable('min',
                                min_max_shape,
                                tf.float32,
                                initializer=tf.constant_initializer(init_min),
                                trainable=False)
      max_var = tf.get_variable('max',
                                min_max_shape,
                                tf.float32,
                                initializer=tf.constant_initializer(init_max),
                                trainable=False)
      if not is_training:
        return self.delayed_quant(
            inputs,
            min_var,
            max_var,
            per_channel=per_channel,
            num_bits=num_bits,
            narrow_range=narrow_range,
            quant_delay=None)

      if per_channel:
        if input_dim == 2:
          reduce_dims = [0]
        elif input_dim == 4:
          reduce_dims = [0, 1, 2]

      if num_bits >= 4:
        quantile = 0
      else:
        quantile = (1.0 / 2.0**(num_bits + 1.0)) * relative_quantile * 100

      if per_channel:
        if input_dim >= 2:
          batch_min = tfp.stats.percentile(
              inputs, q=quantile, axis=reduce_dims, name='BatchMin')
        else:
          batch_min = inputs
      else:
        batch_min = tfp.stats.percentile(
            inputs, q=quantile, name='BatchMin')

      if per_channel:
        if input_dim >= 2:
          batch_max = tfp.stats.percentile(
              inputs, q=100 - quantile, axis=reduce_dims, name='BatchMax')
        else:
          batch_max = inputs
      else:
        batch_max = tfp.stats.percentile(
            inputs, q=100 - quantile, name='BatchMax')

      if narrow_range:
        multiplier = 1.0
      else:
        multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0)

      batch_abs_max = tf.maximum(tf.abs(batch_min), tf.abs(batch_max))

      if narrow_range:
        batch_adjusted_min = 0 - batch_abs_max
      else:
        multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0)
        batch_adjusted_min = 0 - tf.scalar_mul(multiplier, batch_abs_max)

      batch_abs_max = tf.cast(batch_abs_max, tf.float32)
      batch_adjusted_min = tf.cast(batch_adjusted_min, tf.float32)

      if freeze:
        def make_var_op(var):
          def f():
            return var
          return f

        quant_step = common.CreateOrGetQuantizationStep()
        min_max_assign = tf.less_equal(
            quant_step, 1, name='MinMaxAssign')
        min_value = tf.cond(min_max_assign,
                            make_var_op(batch_adjusted_min),
                            make_var_op(min_var),
                            name='AssignMinCond')
        max_value = tf.cond(min_max_assign,
                            make_var_op(batch_abs_max),
                            make_var_op(max_var),
                            name='AssignMaxCond')
      else:
        min_value = batch_adjusted_min
        max_value = batch_abs_max

      assign_min = tf.assign(min_var, min_value)
      assign_max = tf.assign(max_var, max_value)

      return self.delayed_quant(
          inputs,
          assign_min,
          assign_max,
          per_channel=per_channel,
          num_bits=num_bits,
          narrow_range=narrow_range,
          quant_delay=quant_delay)
Example #28
0
    def _match(self, similarity_matrix):
        """Tries to match each column of the similarity matrix to a row.

    Args:
      similarity_matrix: tensor of shape [N, M] representing any similarity
        metric.

    Returns:
      Match object with corresponding matches for each of M columns.
    """
        def _match_when_rows_are_empty():
            """Performs matching when the rows of similarity matrix are empty.

      When the rows are empty, all detections are false positives. So we return
      a tensor of -1's to indicate that the columns do not match to any rows.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                similarity_matrix)
            return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)

        def _match_when_rows_are_non_empty():
            """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            # Matches for each column
            matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

            # Deal with matched and unmatched threshold
            if self._matched_threshold is not None:
                # Get logical indices of ignored and unmatched columns as tf.int64
                matched_vals = tf.reduce_max(similarity_matrix, 0)
                below_unmatched_threshold = tf.greater(
                    self._unmatched_threshold, matched_vals)
                between_thresholds = tf.logical_and(
                    tf.greater_equal(matched_vals, self._unmatched_threshold),
                    tf.greater(self._matched_threshold, matched_vals))

                if self._negatives_lower_than_unmatched:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -1)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -2)
                else:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -2)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -1)

            if self._force_match_for_each_row:
                similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                    similarity_matrix)
                force_match_column_ids = tf.argmax(similarity_matrix,
                                                   1,
                                                   output_type=tf.int32)
                force_match_column_indicators = tf.one_hot(
                    force_match_column_ids, depth=similarity_matrix_shape[1])
                force_match_row_ids = tf.argmax(force_match_column_indicators,
                                                0,
                                                output_type=tf.int32)
                force_match_column_mask = tf.cast(
                    tf.reduce_max(force_match_column_indicators, 0), tf.bool)
                final_matches = tf.where(force_match_column_mask,
                                         force_match_row_ids, matches)
                return final_matches
            else:
                return matches

        if similarity_matrix.shape.is_fully_defined():
            if similarity_matrix.shape[0].value == 0:
                return _match_when_rows_are_empty()
            else:
                return _match_when_rows_are_non_empty()
        else:
            return tf.cond(tf.greater(tf.shape(similarity_matrix)[0],
                                      0), _match_when_rows_are_non_empty,
                           _match_when_rows_are_empty)
Example #29
0
        tensor_dict[fields.InputDataFields.image])[:2]

    if fields.InputDataFields.image_additional_channels in tensor_dict:
      channels = tensor_dict[fields.InputDataFields.image_additional_channels]
      channels = tf.squeeze(channels, axis=3)
      channels = tf.transpose(channels, perm=[1, 2, 0])
      tensor_dict[fields.InputDataFields.image_additional_channels] = channels

    def default_groundtruth_weights():
      return tf.ones(
          [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
          dtype=tf.float32)

    tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
        tf.greater(
            tf.shape(
                tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
            0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
        default_groundtruth_weights)

    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
      # Set all keypoints that are not labeled to NaN.
      gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
      gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
      visibilities_tiled = tf.tile(
          tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1),
          [1, 1, 2])
      tensor_dict[gt_kpt_fld] = tf.where(
          visibilities_tiled,
          tensor_dict[gt_kpt_fld],
          np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))
Example #30
0
def random_crop_image(image,
                      boxes,
                      labels,
                      masks=None,
                      keypoints=None,
                      min_object_covered=1.0,
                      aspect_ratio_range=(0.75, 1.33),
                      area_range=(0.1, 1.0),
                      overlap_thresh=0.3,
                      clip_boxes=True,
                      random_coef=0.0,
                      seed=None):
  """Randomly crops the image.

  Given the input image and its bounding boxes, this op randomly
  crops a subimage.  Given a user-provided set of input constraints,
  the crop window is resampled until it satisfies these constraints.
  If within 100 trials it is unable to find a valid crop, the original
  image is returned. See the Args section for a description of the input
  constraints. Both input boxes and returned Boxes are in normalized
  form (e.g., lie in the unit square [0, 1]).
  This function will return the original image with probability random_coef.

  Note: Keypoint coordinates that are outside the crop will be set to NaN, which
  is consistent with the original keypoint encoding for non-existing keypoints.

  Args:
    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
           with pixel values varying between [0, 1].
    boxes: rank 2 float32 tensor containing the bounding boxes with shape
           [num_instances, 4].
           Boxes are in normalized form meaning their coordinates vary
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    labels: rank 1 int32 tensor containing the object classes.
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks. The masks
           are of the same height, width as the input `image`.
    keypoints: (optional) rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]. The keypoints are in y-x
               normalized coordinates.
    min_object_covered: the cropped image must cover at least this fraction of
                        at least one of the input bounding boxes.
    aspect_ratio_range: allowed range for aspect ratio of cropped image.
    area_range: allowed range for area ratio between cropped image and the
                original image.
    overlap_thresh: minimum overlap thresh with new cropped
                    image to keep the box.
    clip_boxes: whether to clip the boxes to the cropped image.
    random_coef: a random coefficient that defines the chance of getting the
                 original image. If random_coef is 0, we will always get the
                 cropped image, and if it is 1.0, we will always get the
                 original image.
    seed: random seed.

  Returns:
    image: Image shape will be [new_height, new_width, channels].
    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
           form.
    labels: new labels.

    If label_weights, multiclass_scores, masks, or keypoints is not None, the
    function also returns:
    masks: rank 3 float32 tensor with shape [num_instances, height, width]
           containing instance masks.
    keypoints: rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]
  """

  def strict_random_crop_image_fn():
    return _strict_random_crop_image(
        image,
        boxes,
        labels,
        masks=masks,
        keypoints=keypoints,
        min_object_covered=min_object_covered,
        aspect_ratio_range=aspect_ratio_range,
        area_range=area_range,
        overlap_thresh=overlap_thresh,
        clip_boxes=clip_boxes)

  # avoids tf.cond to make faster RCNN training on borg. See b/140057645.
  if random_coef < sys.float_info.min:
    result = strict_random_crop_image_fn()
  else:
    do_a_crop_random = tf.greater(tf.random_uniform([], seed=seed), random_coef)

    outputs = [image, boxes, labels]

    if masks is not None:
      outputs.append(masks)
    if keypoints is not None:
      outputs.append(keypoints)

    result = tf.cond(do_a_crop_random, strict_random_crop_image_fn,
                     lambda: tuple(outputs))
  return result