Python cond Examples

Programming Language: Python

Namespace/Package Name: tensorflow.compat.v1

Method/Function: cond

Examples at hotexamples.com: 30

Python cond - 30 examples found. These are the top rated real world Python examples of tensorflow.compat.v1.cond extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: adam_weight_decay.py Project: yyht/language

def create_optimizer(loss,
                     init_lr,
                     num_train_steps,
                     num_warmup_steps,
                     use_tpu,
                     pretrained_param_names,
                     freeze_pretrained_steps,
                     restart_warmup_after_unfreeze=True,
                     lr_after_restarting=0.):
    """Creates an optimizer training op."""
    global_step = tf.train.get_or_create_global_step()
    global_steps_int = tf.cast(global_step, tf.int32)

    num_train_steps_int = tf.constant(num_train_steps, dtype=tf.int32)
    warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)
    current_step_in_decay = global_steps_int - warmup_steps_int
    num_decay_steps = num_train_steps_int - warmup_steps_int

    global_steps_float = tf.cast(global_steps_int, tf.float32)

    if freeze_pretrained_steps and restart_warmup_after_unfreeze:
        freeze_pretrained_steps_int = tf.cast(freeze_pretrained_steps,
                                              tf.int32)
        global_steps_int -= (tf.cast(
            global_steps_int >= freeze_pretrained_steps_int, tf.int32) *
                             freeze_pretrained_steps_int)
        if lr_after_restarting <= 0.:
            raise ValueError(
                "Learning rate after restarting should not be zero: " +
                str(lr_after_restarting))
        learning_rate = tf.cond(global_step < freeze_pretrained_steps,
                                lambda: init_lr, lambda: lr_after_restarting)

        current_step_in_decay = tf.cond(
            global_step < freeze_pretrained_steps,
            lambda: current_step_in_decay,
            lambda: global_steps_int - warmup_steps_int)

        after_unfreeze_decay_steps = num_train_steps_int - (
            freeze_pretrained_steps + warmup_steps_int)

        num_decay_steps = tf.cond(global_step < freeze_pretrained_steps,
                                  lambda: num_decay_steps,
                                  lambda: after_unfreeze_decay_steps)

        after_unfreeze_steps = global_steps_float - tf.cast(
            freeze_pretrained_steps_int, tf.float32)
        global_steps_float = tf.cond(global_step < freeze_pretrained_steps,
                                     lambda: global_steps_float,
                                     lambda: after_unfreeze_steps)

        tf.summary.scalar(
            "is pretraining",
            tf.cast(global_step < freeze_pretrained_steps, tf.int32))
    else:
        learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)

    tf.summary.scalar("global step count", global_steps_float)
    tf.summary.scalar("current base learning rate", learning_rate)
    tf.summary.scalar("global decay step", current_step_in_decay)
    tf.summary.scalar("total decay steps", num_decay_steps)

    # Implements linear decay of the learning rate.
    learning_rate = tf.train.polynomial_decay(learning_rate,
                                              tf.cast(current_step_in_decay,
                                                      tf.float32),
                                              tf.cast(num_decay_steps,
                                                      tf.float32),
                                              end_learning_rate=0.0,
                                              power=1.0,
                                              cycle=False)

    tf.summary.scalar("decayed learning rate", learning_rate)

    # Implements linear warmup. I.e., if global_step < num_warmup_steps, the
    # learning rate will be `global_step/num_warmup_steps * init_lr`.
    if num_warmup_steps:
        warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)

        warmup_percent_done = global_steps_float / warmup_steps_float

        tf.summary.scalar("warmup percent done", warmup_percent_done)

        warmup_learning_rate = learning_rate * warmup_percent_done

        is_warmup = global_steps_int < warmup_steps_int

        tf.summary.scalar("is warmup", tf.cast(is_warmup, tf.float32))
        learning_rate = tf.cond(is_warmup, lambda: warmup_learning_rate,
                                lambda: learning_rate)

    tf.summary.scalar("learning rate", learning_rate)

    # It is recommended that you use this optimizer for fine tuning, since this
    # is how the model was trained (note that the Adam m/v variables are NOT
    # loaded from init_checkpoint.)
    optimizer = AdamWeightDecayOptimizer(
        learning_rate=learning_rate,
        weight_decay_rate=0.01,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-6,
        exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"],
        pretrained_param_names=pretrained_param_names,
        freeze_pretrained_steps=freeze_pretrained_steps)

    if use_tpu:
        optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

    tvars = tf.trainable_variables()
    grads = tf.gradients(loss, tvars)

    # This is how the model was pre-trained.
    (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)

    train_op = optimizer.apply_gradients(list(zip(grads, tvars)),
                                         global_step=global_step)

    # Normally the global step update is done inside of `apply_gradients`.
    # However, `AdamWeightDecayOptimizer` doesn't do this. But if you use
    # a different optimizer, you should probably take this line out.
    new_global_step = global_step + 1
    train_op = tf.group(train_op, [global_step.assign(new_global_step)])
    return train_op

Example #2

Show file

File: music_vae_train.py Project: timgates42/magenta

def train(train_dir,
          config,
          dataset_fn,
          checkpoints_to_keep=5,
          keep_checkpoint_every_n_hours=1,
          num_steps=None,
          master='',
          num_sync_workers=0,
          num_ps_tasks=0,
          task=0):
    """Train loop."""
    tf.gfile.MakeDirs(train_dir)
    is_chief = (task == 0)
    if is_chief:
        _trial_summary(config.hparams, config.train_examples_path
                       or config.tfds_name, train_dir)
    with tf.Graph().as_default():
        with tf.device(
                tf.train.replica_device_setter(num_ps_tasks,
                                               merge_devices=True)):

            model = config.model
            model.build(config.hparams,
                        config.data_converter.output_depth,
                        is_training=True)

            optimizer = model.train(**_get_input_tensors(dataset_fn(), config))

            hooks = []
            if num_sync_workers:
                optimizer = tf.train.SyncReplicasOptimizer(
                    optimizer, num_sync_workers)
                hooks.append(optimizer.make_session_run_hook(is_chief))

            grads, var_list = list(
                zip(*optimizer.compute_gradients(model.loss)))
            global_norm = tf.global_norm(grads)
            tf.summary.scalar('global_norm', global_norm)

            if config.hparams.clip_mode == 'value':
                g = config.hparams.grad_clip
                clipped_grads = [
                    tf.clip_by_value(grad, -g, g) for grad in grads
                ]
            elif config.hparams.clip_mode == 'global_norm':
                clipped_grads = tf.cond(
                    global_norm < config.hparams.grad_norm_clip_to_zero,
                    lambda: tf.clip_by_global_norm(  # pylint:disable=g-long-lambda
                        grads,
                        config.hparams.grad_clip,
                        use_norm=global_norm)[0],
                    lambda: [tf.zeros(tf.shape(g)) for g in grads])
            else:
                raise ValueError('Unknown clip_mode: {}'.format(
                    config.hparams.clip_mode))
            train_op = optimizer.apply_gradients(list(
                zip(clipped_grads, var_list)),
                                                 global_step=model.global_step,
                                                 name='train_step')

            logging_dict = {
                'global_step': model.global_step,
                'loss': model.loss
            }

            hooks.append(
                tf.train.LoggingTensorHook(logging_dict, every_n_iter=100))
            if num_steps:
                hooks.append(tf.train.StopAtStepHook(last_step=num_steps))

            scaffold = tf.train.Scaffold(saver=tf.train.Saver(
                max_to_keep=checkpoints_to_keep,
                keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours))
            tf_slim.training.train(train_op=train_op,
                                   logdir=train_dir,
                                   scaffold=scaffold,
                                   hooks=hooks,
                                   save_checkpoint_secs=60,
                                   master=master,
                                   is_chief=is_chief)

Example #3

Show file

def build_learning_rate_schedule(
    learning_rate,
    decay_type,
    warmup_start_epoch,
    max_learning_rate_epoch,
    decay_end_epoch,
    global_step,
    steps_per_epoch,
    **decay_type_specific_kwargs):
  """Build learning rate from base learning rate and other details.

  We note that warmup_start_epoch <= max_learning_rate_epoch < decay_end_epoch
  since the warmup happens at the start of learning rate schedule.

  Args:
    learning_rate: Learning rate for the model.
    decay_type: Name of the decay that should be applied to the learning rate.
    warmup_start_epoch: Epoch at which learning rate warmup starts.
    max_learning_rate_epoch: Epoch at which learning rate warmup ends and the
      decay kicks in.
    decay_end_epoch: Epoch at which learning rate decays ends, at which point
      learning rate becomes 0.
    global_step: The global step to use for learning rate computation.
    steps_per_epoch: Integer which defines the number of steps that are run for
      every epoch.
    **decay_type_specific_kwargs: Specific key-word arguments which are unique
      to a said `decay_type`.


  Returns:
    Scalar tensor which stores the learning rate at a given global step.
  """
  if decay_end_epoch == max_learning_rate_epoch:
    # This stage of training is 0 epochs long, so just return learning_rate and
    # avoid potential divide by 0 problems.
    if warmup_start_epoch < max_learning_rate_epoch:
      raise ValueError(
          'Cannot have warmup for a 0-step learning rate schedule.')

    return learning_rate

  assert warmup_start_epoch <= max_learning_rate_epoch
  assert max_learning_rate_epoch < decay_end_epoch

  max_learning_rate_epoch_tensor = tf.convert_to_tensor(max_learning_rate_epoch)
  warmup_start_epoch_tensor = tf.convert_to_tensor(
      warmup_start_epoch, max_learning_rate_epoch_tensor.dtype)
  decay_end_epoch_tensor = tf.convert_to_tensor(
      decay_end_epoch, max_learning_rate_epoch_tensor.dtype)
  steps_per_epoch_tensor = tf.cast(steps_per_epoch,
                                   max_learning_rate_epoch_tensor.dtype)

  # Learning rate decay kicks in starting max_learning_rate_epoch
  # Before max_learning_rate_epoch either there is a warmup or the learning rate
  # is set to the constant value of `initial_lr`.
  learning_rate_step = global_step - tf.cast(
      max_learning_rate_epoch_tensor * steps_per_epoch_tensor,
      global_step.dtype)

  def _no_decay_fn(initial_lr, *args, **kwargs):
    del args, kwargs
    return initial_lr

  decay_type_fn_map = {
      enums.DecayType.EXPONENTIAL: exponential_decay,
      enums.DecayType.COSINE: cosine_decay,
      enums.DecayType.PIECEWISE_LINEAR: piecewise_linear_decay,
      enums.DecayType.NO_DECAY: _no_decay_fn,
  }
  if decay_type not in decay_type_fn_map:
    raise ValueError(f'Unknown decay type {decay_type}')

  decayed_learning_rate = decay_type_fn_map[decay_type](
      initial_lr=learning_rate,
      global_step=learning_rate_step,
      total_epochs=decay_end_epoch_tensor - max_learning_rate_epoch_tensor,
      steps_per_epoch=steps_per_epoch,
      **decay_type_specific_kwargs)

  # The learning rate is set to 0 once global_step is more than total_steps.
  total_steps = tf.cast(
      steps_per_epoch_tensor * (
          decay_end_epoch_tensor - max_learning_rate_epoch_tensor),
      global_step.dtype)
  decayed_learning_rate = tf.cond(
      learning_rate_step <= total_steps,
      lambda: decayed_learning_rate,
      lambda: 0.0)

  warmup_step_counter = global_step - tf.cast(
      warmup_start_epoch_tensor * steps_per_epoch_tensor, global_step.dtype)
  return maybe_add_warmup_to_lr(
      learning_rate, decayed_learning_rate, warmup_step_counter,
      max_learning_rate_epoch - warmup_start_epoch_tensor,
      steps_per_epoch_tensor)

Example #4

Show file

def _buckets(data, bucket_count=None):
    """Create a TensorFlow op to group data into histogram buckets.

    Arguments:
      data: A `Tensor` of any shape. Must be castable to `float64`.
      bucket_count: Optional positive `int` or scalar `int32` `Tensor`.
    Returns:
      A `Tensor` of shape `[k, 3]` and type `float64`. The `i`th row is
      a triple `[left_edge, right_edge, count]` for a single bucket.
      The value of `k` is either `bucket_count` or `1` or `0`.
    """
    # TODO(nickfelt): remove on-demand imports once dep situation is fixed.
    import tensorflow.compat.v1 as tf

    if bucket_count is None:
        bucket_count = summary_v2.DEFAULT_BUCKET_COUNT
    with tf.name_scope("buckets",
                       values=[data, bucket_count]), tf.control_dependencies([
                           tf.assert_scalar(bucket_count),
                           tf.assert_type(bucket_count, tf.int32)
                       ]):
        data = tf.reshape(data, shape=[-1])  # flatten
        data = tf.cast(data, tf.float64)
        is_empty = tf.equal(tf.size(input=data), 0)

        def when_empty():
            return tf.constant([], shape=(0, 3), dtype=tf.float64)

        def when_nonempty():
            min_ = tf.reduce_min(input_tensor=data)
            max_ = tf.reduce_max(input_tensor=data)
            range_ = max_ - min_
            is_singular = tf.equal(range_, 0)

            def when_nonsingular():
                bucket_width = range_ / tf.cast(bucket_count, tf.float64)
                offsets = data - min_
                bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                                         dtype=tf.int32)
                clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
                # Use float64 instead of float32 to avoid accumulating floating point error
                # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values.
                # See https://github.com/tensorflow/tensorflow/issues/51419 for details.
                one_hots = tf.one_hot(clamped_indices,
                                      depth=bucket_count,
                                      dtype=tf.float64)
                bucket_counts = tf.cast(
                    tf.reduce_sum(input_tensor=one_hots, axis=0),
                    dtype=tf.float64,
                )
                edges = tf.linspace(min_, max_, bucket_count + 1)
                left_edges = edges[:-1]
                right_edges = edges[1:]
                return tf.transpose(
                    a=tf.stack([left_edges, right_edges, bucket_counts]))

            def when_singular():
                center = min_
                bucket_starts = tf.stack([center - 0.5])
                bucket_ends = tf.stack([center + 0.5])
                bucket_counts = tf.stack(
                    [tf.cast(tf.size(input=data), tf.float64)])
                return tf.transpose(
                    a=tf.stack([bucket_starts, bucket_ends, bucket_counts]))

            return tf.cond(is_singular, when_singular, when_nonsingular)

        return tf.cond(is_empty, when_empty, when_nonempty)

Example #5

Show file

    def build_sample_graph(self,
                           input_pianorolls=None,
                           outer_masks=None,
                           total_gibbs_steps=None):
        """Builds the tf.while_loop based sampling graph.

    Args:
      input_pianorolls: Optional input pianorolls override. If None, uses the
          pianorolls placeholder.
      outer_masks: Optional input outer_masks override. If None, uses the
          outer_masks placeholder.
      total_gibbs_steps: Optional input total_gibbs_steps override. If None,
          uses the total_gibbs_steps placeholder.
    Returns:
      The output op of the graph.
    """
        if input_pianorolls is None:
            input_pianorolls = self.inputs["pianorolls"]
        if outer_masks is None:
            outer_masks = self.inputs["outer_masks"]

        tt = tf.shape(input_pianorolls)[1]
        sample_steps = tf.to_float(self.inputs["sample_steps"])
        if total_gibbs_steps is None:
            total_gibbs_steps = self.inputs["total_gibbs_steps"]
        temperature = self.inputs["temperature"]

        input_pianorolls = tf.to_float(input_pianorolls)
        outer_masks = self.make_outer_masks(outer_masks, input_pianorolls)

        # Calculate total_gibbs_steps as steps * num_instruments if not given.
        total_gibbs_steps = tf.cond(
            tf.equal(total_gibbs_steps, 0),
            lambda: tf.to_float(tt * self.hparams.num_instruments),
            lambda: tf.to_float(total_gibbs_steps))

        # sample_steps is set to total_gibbs_steps if not given.
        sample_steps = tf.cond(tf.equal(sample_steps,
                                        0), lambda: total_gibbs_steps,
                               lambda: tf.to_float(sample_steps))

        def infer_step(pianorolls, step_count):
            """Called by tf.while_loop, takes a Gibbs step."""
            mask_prob = compute_mask_prob_from_yao_schedule(
                step_count, total_gibbs_steps)
            # 1 indicates mask out, 0 is not mask.
            masks = make_bernoulli_masks(tf.shape(pianorolls), mask_prob,
                                         outer_masks)

            logits = self.predict(pianorolls, masks)
            samples = sample_with_temperature(logits, temperature=temperature)

            outputs = pianorolls * (1 - masks) + samples * masks

            check_completion_op = tf.assert_equal(
                tf.where(tf.equal(tf.reduce_max(masks, axis=2), 1.),
                         tf.reduce_max(outputs, axis=2),
                         tf.reduce_max(pianorolls, axis=2)), 1.)
            with tf.control_dependencies([check_completion_op]):
                outputs = tf.identity(outputs)

            step_count += 1
            return outputs, step_count

        current_step = tf.to_float(self.inputs["current_step"])

        # Initializes pianorolls by evaluating the model once to fill in all gaps.
        logits = self.predict(tf.to_float(input_pianorolls), outer_masks)
        samples = sample_with_temperature(logits, temperature=temperature)
        tf.get_variable_scope().reuse_variables()

        self.samples, current_step = tf.while_loop(
            lambda samples, current_step: current_step < sample_steps,
            infer_step, [samples, current_step],
            shape_invariants=[
                tf.TensorShape([None, None, None, None]),
                tf.TensorShape(None),
            ],
            back_prop=False,
            parallel_iterations=1,
            name="coco_while")
        self.samples.set_shape(input_pianorolls.shape)
        return self.samples

Example #6

Show file

File: reader_cityscapes.py Project: FloatingPoint64/depth_and_egomotion

def parse_fn(filename, output_sequence_length=IMAGES_PER_SEQUENCE):
    """Read data from single files stored in directories.

    Args:
      filename: the filename of the set of files to be loaded.
      output_sequence_length: Length of the output sequence. If less than
        IMAGES_PER_SEQUENCE, only the first `output_sequence_length` frames will
        be kept.

    Returns:
      A dictionary that maps strings to tf.Tensors of type float32:

      'rgb': an RGB image of shape H, W, 3. Each channel value is between 0.0 and
             1.0.
      'intrinsics': a list of intrinsics values.
    """
    if output_sequence_length > IMAGES_PER_SEQUENCE or output_sequence_length < 1:
        raise ValueError(
            'Invalid output_sequence_length %d: must be within [1, '
            '%d].' % (output_sequence_length, IMAGES_PER_SEQUENCE))
    image_file = tf.strings.join([filename, '.png'])
    intrinsics_file = tf.strings.join([filename, '_cam.txt'])
    mask_file = tf.strings.join([filename, '-fseg.png'])

    # Read files.
    encoded_image = tf.io.read_file(image_file)
    encoded_mask = tf.io.read_file(mask_file)
    intrinsics_content = tf.io.read_file(intrinsics_file)
    content_is_empty = tf.math.equal(intrinsics_content, '')
    filename_matches = tf.strings.regex_full_match(
        filename, '.*%s$' % KITTI_CORRUPT_FILE)
    file_is_corrupt = tf.math.logical_and(content_is_empty, filename_matches)

    intrinsics_content = tf.cond(file_is_corrupt,
                                 lambda: KITTI_CORRUPT_FILE_INTRINSICS,
                                 lambda: intrinsics_content)

    # Parse intrinsics data to a tensor representing a 3x3 matrix.
    intrinsics = tf.strings.split([intrinsics_content], ',').values
    intrinsics = tf.strings.to_number(intrinsics)
    intrinsics.set_shape([9])

    fx, _, x0, _, fy, y0, _, _, _ = tf.unstack(intrinsics)
    intrinsics = tf.stack([IMAGE_WIDTH, IMAGE_HEIGHT, fx, fy, x0, y0])

    # Decode and normalize images.
    decoded_image = tf.image.decode_png(encoded_image, channels=3)
    decoded_image = tf.to_float(decoded_image) * (1 / 255.0)
    split_image_sequence = tf.split(decoded_image, IMAGES_PER_SEQUENCE, axis=1)

    decoded_mask = tf.image.decode_png(encoded_mask, channels=3)
    mask_r, mask_g, mask_b = tf.unstack(tf.to_int32(decoded_mask), axis=-1)
    # Since TPU does not support images of type uint8, we encode the 3 RGB uint8
    # values into one int32 value.
    mask = mask_r * (256 * 256) + mask_g * 256 + mask_b
    # All images in our pipeline have 3 dimensions (height, width, channels), so
    # we add a third dimension to the mask too.
    mask = tf.expand_dims(mask, -1)
    split_mask_sequence = tf.split(mask, IMAGES_PER_SEQUENCE, axis=1)

    return {
        'rgb': tf.stack(split_image_sequence[:output_sequence_length]),
        'intrinsics': tf.stack([intrinsics] * output_sequence_length),
        'mask': tf.stack(split_mask_sequence[:output_sequence_length]),
    }

Example #7

Show file

File: maskrcnn_parser.py Project: vitalyvels/tpu

    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        if self._include_mask:
            masks = data['groundtruth_instance_masks']

        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            if self._include_mask:
                masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            if self._include_mask:
                image, boxes, masks = input_utils.random_horizontal_flip(
                    image, boxes, masks)
            else:
                image, boxes = input_utils.random_horizontal_flip(image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=input_utils.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  (image_height, image_width),
                                                  offset)
        if self._include_mask:
            masks = input_utils.resize_and_crop_masks(
                tf.expand_dims(masks, axis=-1), image_scale,
                (image_height, image_width), offset)
            masks = tf.squeeze(masks, axis=-1)

        # Filters out ground truth boxes that are all zeros.
        indices = input_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        if self._include_mask:
            masks = tf.gather(masks, indices)
            num_masks = tf.shape(masks)[0]
            masks = tf.image.crop_and_resize(
                tf.expand_dims(masks, axis=-1),
                box_utils.normalize_boxes(boxes,
                                          tf.shape(image)[0:2]),
                box_ind=tf.range(num_masks, dtype=tf.int32),
                crop_size=[self._mask_crop_size, self._mask_crop_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size,
                                     (image_height, image_width))
        anchor_labeler = anchor.RpnAnchorLabeler(input_anchor,
                                                 self._rpn_match_threshold,
                                                 self._rpn_unmatched_threshold,
                                                 self._rpn_batch_size_per_im,
                                                 self._rpn_fg_fraction)
        rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
            boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'anchor_boxes': input_anchor.multilevel_boxes,
            'image_info': image_info,
            'rpn_score_targets': rpn_score_targets,
            'rpn_box_targets': rpn_box_targets,
        }
        labels['gt_boxes'] = input_utils.pad_to_fixed_size(
            boxes, self._max_num_instances, -1)
        labels['gt_classes'] = input_utils.pad_to_fixed_size(
            classes, self._max_num_instances, -1)
        if self._include_mask:
            labels['gt_masks'] = input_utils.pad_to_fixed_size(
                masks, self._max_num_instances, -1)

        return image, labels

Example #8

Show file

 def proposal(*args):
     return tf.cond(
         pred=no_crop_check(),
         true_fn=no_crop_proposal,
         false_fn=crop_proposal,
     )

Example #9

Show file

def get_train_ops(loss,
                  tf_variables,
                  train_step,
                  clip_mode=None,
                  grad_bound=None,
                  l2_reg=1e-4,
                  lr_warmup_val=None,
                  lr_warmup_steps=100,
                  lr_init=0.1,
                  lr_dec_start=0,
                  lr_dec_every=10000,
                  lr_dec_rate=0.1,
                  lr_dec_min=None,
                  lr_cosine=False,
                  lr_max=None,
                  lr_min=None,
                  lr_T_0=None,
                  lr_T_mul=None,
                  num_train_batches=None,
                  optim_algo=None,
                  sync_replicas=False,
                  num_aggregate=None,
                  num_replicas=None,
                  get_grad_norms=False,
                  moving_average=None):
    """
	Args:
	  clip_mode: "global", "norm", or None.
	  moving_average: store the moving average of parameters
	"""

    if l2_reg > 0:
        l2_losses = []
        for var in tf_variables:
            l2_losses.append(tf.reduce_sum(var**2))
        l2_loss = tf.add_n(l2_losses)
        loss += l2_reg * l2_loss  # loss = loss + 1e-4*l2_loss

    grads = tf.gradients(loss, tf_variables)
    grad_norm = tf.global_norm(grads)

    grad_norms = {}
    for v, g in zip(tf_variables, grads):
        if v is None or g is None:
            continue
        if isinstance(g, tf.IndexedSlices):
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g.values**2))
        else:
            grad_norms[v.name] = tf.sqrt(tf.reduce_sum(g**2))

    if clip_mode is not None:
        assert grad_bound is not None, "Need grad_bound to clip gradients."
        if clip_mode == "global":
            grads, _ = tf.clip_by_global_norm(grads, grad_bound)
        elif clip_mode == "norm":
            clipped = []
            for g in grads:
                if isinstance(g, tf.IndexedSlices):
                    c_g = tf.clip_by_norm(g.values, grad_bound)
                    c_g = tf.IndexedSlices(g.indices, c_g)
                else:
                    c_g = tf.clip_by_norm(g, grad_bound)
                clipped.append(g)
            grads = clipped
        else:
            raise NotImplementedError("Unknown clip_mode {}".format(clip_mode))

    if lr_cosine:
        assert lr_max is not None, "Need lr_max to use lr_cosine"
        assert lr_min is not None, "Need lr_min to use lr_cosine"
        assert lr_T_0 is not None, "Need lr_T_0 to use lr_cosine"
        assert lr_T_mul is not None, "Need lr_T_mul to use lr_cosine"
        assert num_train_batches is not None, ("Need num_train_batches to use"
                                               " lr_cosine")

        curr_epoch = train_step // num_train_batches  # train step will be calculated by just one batch!

        last_reset = tf.Variable(0,
                                 dtype=tf.int32,
                                 trainable=False,
                                 name="last_reset")
        T_i = tf.Variable(lr_T_0, dtype=tf.int32, trainable=False, name="T_i")
        T_curr = curr_epoch - last_reset

        def _update():
            update_last_reset = tf.assign(last_reset,
                                          curr_epoch,
                                          use_locking=True)
            update_T_i = tf.assign(T_i, T_i * lr_T_mul, use_locking=True)
            with tf.control_dependencies([update_last_reset, update_T_i]):
                rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
                lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        def _no_update():
            rate = tf.to_float(T_curr) / tf.to_float(T_i) * 3.1415926
            lr = lr_min + 0.5 * (lr_max - lr_min) * (1.0 + tf.cos(rate))
            return lr

        learning_rate = tf.cond(tf.greater_equal(T_curr, T_i), _update,
                                _no_update)
    else:
        learning_rate = tf.train.exponential_decay(
            lr_init,
            tf.maximum(train_step - lr_dec_start, 0),
            lr_dec_every,
            lr_dec_rate,
            staircase=True)
        if lr_dec_min is not None:
            learning_rate = tf.maximum(learning_rate, lr_dec_min)

    if lr_warmup_val is not None:
        learning_rate = tf.cond(tf.less(train_step, lr_warmup_steps),
                                lambda: lr_warmup_val, lambda: learning_rate)

    if optim_algo == "momentum":
        opt = tf.train.MomentumOptimizer(learning_rate,
                                         0.9,
                                         use_locking=True,
                                         use_nesterov=True)
    elif optim_algo == "sgd":
        opt = tf.train.GradientDescentOptimizer(learning_rate,
                                                use_locking=True)
    elif optim_algo == "adam":
        opt = tf.train.AdamOptimizer(learning_rate,
                                     beta1=0.0,
                                     epsilon=1e-3,
                                     use_locking=True)
    else:
        raise ValueError("Unknown optim_algo {}".format(optim_algo))

    if sync_replicas:
        assert num_aggregate is not None, "Need num_aggregate to sync."
        assert num_replicas is not None, "Need num_replicas to sync."

        opt = tf.train.SyncReplicasOptimizer(
            opt,
            replicas_to_aggregate=num_aggregate,
            total_num_replicas=num_replicas,
            use_locking=True)

    if moving_average is not None:
        opt = tf.contrib.opt.MovingAverageOptimizer(
            opt, average_decay=moving_average)

    train_op = opt.apply_gradients(zip(grads, tf_variables),
                                   global_step=train_step)

    if get_grad_norms:
        return train_op, learning_rate, grad_norm, opt, grad_norms
    else:
        return train_op, learning_rate, grad_norm, opt

Example #10

Show file

File: test_control_flow.py Project: Leo-arm/tvm

 def body(x):
     x = tf.constant(7)
     z = tf.constant(20)
     res = tf.cond(tf.less(x, 10), lambda: tf.add(
         10, 20), lambda: tf.square(10))
     return tf.multiply(res, x)

Example #11

Show file

File: gan_model.py Project: alicia-tsai/TAR-toolkits

def D(data_source,
      x_real,
      x_fake,
      dropout_rate,
      is_training,
      reuse=True,
      print_summary=True):
    # data_source is a string, either "fake" or "real", which determines whether do to the word
    # embedding lookup to avoid non-differentiability issues.
    # discriminator (x -> n + 1 class)

    with tf.variable_scope('Discriminator', reuse=reuse) as scope:
        # Embedding layer
        # Input x has shape [batch_size, 63] where 63 is the sequence length
        W_embed = tf.Variable(tf.random_uniform([vocab_size, embedding_size],
                                                -1.0, 1.0),
                              name="W_embed")
        embedded_chars = tf.nn.embedding_lookup(W_embed, x_real)
        # Add a channel dimension:
        embedded_char_expanded = tf.expand_dims(embedded_chars, -1)
        # Output size: [batch_size, sequence_length, embedding_size, 1]

        print('fake shape is!')
        print(x_fake.get_shape())
        print('embed_char_expand shape is!')
        print(embedded_char_expanded.get_shape())

        # conditional pipeline!
        def f1():
            return embedded_char_expanded

        def f2():
            return x_fake

        real_or_fake = tf.math.equal('real', data_source)
        input_x = tf.cond(real_or_fake, f1, f2)

        print('input_x shape is!')  # [batch, seq_len, embed_size, 1]
        print(input_x.get_shape())

        pooled_outputs = [
        ]  # As per the paper, the pooling layer takes the max of each filter's featuremaps
        # NOTE: We are using multiple filter sizes as per the paper's specs
        for i, filter_size in enumerate(filter_sizes):
            #with tf.name_scope("conv-maxpool-filter_size-"+str(filter_size)):
            # Define W as the filter matrix (NOTE: different namescope from the W above)
            # Initialized with truncated normal parameters
            # The W filter has shape: [height, width, input_channels, output_channels]
            W = tf.Variable(
                tf.truncated_normal(
                    [filter_size, embedding_size, 1, num_filters], stddev=0.1))
            # Conv layer: valid padding yields output of shape:
            # [none, sequence_length - filter_size + 1, 1, num_filters]
            # for dimensions: [none, height, width, channel]
            # TF document: "(conv2d) has the same type as input and the same outer batch shape."
            conv = tf.nn.conv2d(input_x,
                                W,
                                strides=[1, 1, 1, 1],
                                padding="VALID",
                                name="conv")
            # Biase vector: 1d vector with length=number of output channels of conv
            b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
            # Relu
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
            lrelu3 = tf.maximum(0.2 * h, h)
            # TF document: "ksize: The size of the window for each dimension of the input tensor."
            pooled = tf.nn.max_pool(
                lrelu3,
                ksize=[1, sequence_length - filter_size + 1, 1, 1],
                strides=[1, 1, 1, 1],
                padding="VALID",
                name="pool")
            # The output now has size: [none, 1, 1, num_filters]
            pooled_outputs.append(pooled)
        num_filters_total = num_filters * len(filter_sizes)
        h_pool = tf.concat(pooled_outputs,
                           3)  # Concatenate on the forth dimension
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

        # The output now has shape: [none, num_filters_total]
        #with tf.name_scope("dropout"):
        h_drop = tf.nn.dropout(h_pool_flat, rate=dropout_rate)
        #with tf.name_scope("output"):
        # Fully connected layer
        # Matrix multiplication: (none, num_filters_total)x(num_filters_total, num_classes) = (none, num_classes)
        W = tf.Variable(tf.truncated_normal(
            [num_filters_total, num_classes + 1], stddev=0.1),
                        name="W")
        # NOTE: b has dimension of the channels (in this case, num_classes)
        b = tf.Variable(tf.constant(0.1, shape=[num_classes + 1]), name="b")
        fc = tf.nn.xw_plus_b(h_drop, W, b, name="scores")  #Logits
        output = tf.nn.softmax(fc)
        return h_pool_flat, fc, output, real_or_fake

Example #12

Show file

File: dataloader.py Project: nnuq/tpu-1

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        image_info: image information that includes the original height and
            width, the scale of the proccessed image to the original image, and
            the scaled height and width.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                data['groundtruth_is_crowd'] = tf.cond(
                    tf.greater(tf.size(data['groundtruth_is_crowd']),
                               0), lambda: data['groundtruth_is_crowd'],
                    lambda: tf.zeros_like(data['groundtruth_classes'],
                                          dtype=tf.bool))
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                input_height = tf.shape(image)[0]
                input_width = tf.shape(image)[1]

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                scaled_height = tf.to_float(
                    input_height) * input_processor.image_scale
                scaled_width = tf.to_float(
                    input_width) * input_processor.image_scale
                image_info = tf.stack([
                    tf.cast(scaled_height, dtype=tf.float32),
                    tf.cast(scaled_width, dtype=tf.float32),
                    image_scale,
                    tf.cast(input_height, dtype=tf.float32),
                    tf.cast(input_width, dtype=tf.float32),
                ])
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, image_info, boxes, is_crowds,
                        areas, classes)

Example #13

Show file

    def compute_total_loss(self, pd_new, pd_old, value_tensor, return_tensor,
                           batch_advantage_norm, policy_old_neg_logprob_tensor,
                           policy_action_tensor):
        """Defines the total loss function.

    Args:
      pd_new: The current policy distribution
        (a multivariate normal distribution). This policy distribution gets
        updated in the course of training.
      pd_old: The old policy distribution that we use during sampling the
        trajectory (a multivariate normal distribution).
      value_tensor: The values associated to the rollout trajectory.
      return_tensor: The return values computed for the rollout trajectory.
      batch_advantage_norm: The normalized advantage tensor computed for a
        batch of data. For advantage calculation, we use generalized
        advantage estimation (GAE) formula.
      policy_old_neg_logprob_tensor: The negative log probabilities from the
        policy rollouts.
      policy_action_tensor: The actions from the policy rollouts.
    """
        # Policy loss
        ppo_policy_loss_out = ppo_loss.ppo_policy_loss(
            neg_logprobs_old=policy_old_neg_logprob_tensor,
            actions=policy_action_tensor,
            advantages=batch_advantage_norm,
            dist_new=pd_new,
            mcts_sampling=self.mcts_sampling_enable)

        (self.policy_loss, self.approxkl, self.clipfrac,
         self.policy_ratio) = ppo_policy_loss_out

        # Value Loss
        if self._ppo2_enable:
            self.value_loss = ppo_loss.ppo2_value_loss(
                value_old=value_tensor,
                pred_value=self.value_new,
                returns=return_tensor)
        else:
            self.value_loss = ppo_loss.ppo1_value_loss(
                pred_value=self.value_new, returns=return_tensor)

        # MSE loss between mean and standard deviations
        self.mean_mse_loss, self.logstd_mse_loss = ppo_loss.l2_norm_policy_loss(
            policy_mean=self.mean_new,
            policy_logstd=self.logstd_new,
            mcts_mean=self.mean_old,
            mcts_logstd=self.logstd_old)

        mcts_dist = distributions.MultiVariateNormalDiag(
            mean=self.mean_old, logstd=self.logstd_old)
        policy_dist = distributions.MultiVariateNormalDiag(
            mean=self.mean_new, logstd=self.logstd_new)
        self.imitation_kl_divergence = tf.reduce_mean(
            policy_dist.kl_divergence(mcts_dist))
        # Calculate KL divergence and entropy of new distribution
        self.kl_divergence = tf.reduce_mean(pd_new.kl_divergence(pd_old))
        self.entropy = pd_new.entropy()

        # Calculate entropy loss
        self.entropy_loss = tf.reduce_mean(self.entropy)

        # Calulate total loss
        total_loss_ppo = (self._policy_coeff * self.policy_loss) + (
            self._value_coeff * self.value_loss) - (self._entropy_coeff *
                                                    self.entropy_loss)

        total_loss_mcts = (self._value_coeff * self.value_loss) + (
            self._mse_loss_coeff *
            (self.imitation_kl_divergence + self.entropy_loss))

        self.total_loss = tf.cond(tf.equal(self.mcts_sampling_enable,
                                           True), lambda: total_loss_mcts,
                                  lambda: total_loss_ppo)

Example #14

Show file

File: create_optimizer.py Project: huanghuidmml/tfbert

def create_train_op(optimizer,
                    grads_and_vars,
                    max_grad=1.0,
                    mixed_precision=False,
                    gradient_accumulation_steps=1):
    global_step = tf.train.get_or_create_global_step()

    if gradient_accumulation_steps > 1:
        local_step = tf.get_variable(name="local_step",
                                     shape=[],
                                     dtype=tf.int32,
                                     trainable=False,
                                     initializer=tf.zeros_initializer)
        batch_finite = tf.get_variable(name="batch_finite",
                                       shape=[],
                                       dtype=tf.bool,
                                       trainable=False,
                                       initializer=tf.ones_initializer)
        accum_vars = [
            tf.get_variable(name=tvar.name.split(":")[0] + "/accum",
                            shape=tvar.shape.as_list(),
                            dtype=tf.float32,
                            trainable=False,
                            initializer=tf.zeros_initializer())
            for tvar in tf.trainable_variables()
        ]

        reset_step = tf.cast(tf.math.equal(
            local_step % gradient_accumulation_steps, 0),
                             dtype=tf.bool)
        local_step = tf.cond(
            reset_step, lambda: local_step.assign(tf.ones_like(local_step)),
            lambda: local_step.assign_add(1))

        grads_and_vars_and_accums = [(gv[0], gv[1], accum_vars[i])
                                     for i, gv in enumerate(grads_and_vars)
                                     if gv[0] is not None]
        grads, tvars, accum_vars = list(zip(*grads_and_vars_and_accums))

        all_are_finite = tf.reduce_all([
            tf.reduce_all(tf.is_finite(g)) for g in grads
        ]) if mixed_precision else tf.constant(True, dtype=tf.bool)
        batch_finite = tf.cond(
            reset_step, lambda: batch_finite.assign(
                tf.math.logical_and(tf.constant(True, dtype=tf.bool),
                                    all_are_finite)),
            lambda: batch_finite.assign(
                tf.math.logical_and(batch_finite, all_are_finite)))

        # This is how the model was pre-trained.
        # ensure global norm is a finite number
        # to prevent clip_by_global_norm from having a hizzy fit.
        (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=max_grad)

        accum_vars = tf.cond(
            reset_step, lambda: [
                accum_vars[i].assign(grad)
                for i, grad in enumerate(clipped_grads)
            ], lambda: [
                accum_vars[i].assign_add(grad)
                for i, grad in enumerate(clipped_grads)
            ])

        def update(accum_vars):
            return optimizer.apply_gradients(list(zip(accum_vars, tvars)))

        update_step = tf.identity(tf.cast(tf.math.equal(
            local_step % gradient_accumulation_steps, 0),
                                          dtype=tf.bool),
                                  name="update_step")
        update_op = tf.cond(update_step, lambda: update(accum_vars),
                            lambda: tf.no_op())

        new_global_step = tf.cond(
            tf.math.logical_and(update_step, batch_finite),
            lambda: global_step + 1, lambda: global_step)
        new_global_step = tf.identity(new_global_step, name='step_update')
        train_op = tf.group(update_op, [global_step.assign(new_global_step)])
    else:
        grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
        grads, tvars = list(zip(*grads_and_vars))
        all_are_finite = tf.reduce_all([
            tf.reduce_all(tf.is_finite(g)) for g in grads
        ]) if mixed_precision else tf.constant(True, dtype=tf.bool)

        # This is how the model was pre-trained.
        # ensure global norm is a finite number
        # to prevent clip_by_global_norm from having a hizzy fit.
        (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=max_grad)

        # 这里不要传入global step，adam内部没有对global step累加
        # 而原本adam等tf内置优化器会累加，这样就会造成global step重复增加
        train_op = optimizer.apply_gradients(list(zip(clipped_grads, tvars)))

        new_global_step = tf.cond(all_are_finite, lambda: global_step + 1,
                                  lambda: global_step)
        new_global_step = tf.identity(new_global_step, name='step_update')

        train_op = tf.group(train_op, [global_step.assign(new_global_step)])
    return train_op

Example #15

Show file

File: lda2vec.py Project: madamroziyani/malaya

    def __init__(self,
                 num_unique_documents,
                 vocab_size,
                 num_topics,
                 freqs,
                 embedding_size=128,
                 num_sampled=40,
                 learning_rate=1e-3,
                 lmbda=150.0,
                 alpha=None,
                 power=0.75,
                 batch_size=32,
                 clip_gradients=5.0,
                 **kwargs):
        device = get_device(**kwargs)
        _graph = tf.Graph()

        with _graph.as_default():
            with tf.device(device):
                moving_avgs = tf.train.ExponentialMovingAverage(0.9)
                self.batch_size = batch_size
                self.freqs = freqs

                self.X = tf.placeholder(tf.int32, shape=[None])
                self.Y = tf.placeholder(tf.int64, shape=[None])
                self.DOC = tf.placeholder(tf.int32, shape=[None])
                self.switch_loss = tf.Variable(0, trainable=False)
                train_labels = tf.reshape(self.Y, [-1, 1])
                sampler = tf.nn.fixed_unigram_candidate_sampler(
                    train_labels,
                    num_true=1,
                    num_sampled=num_sampled,
                    unique=True,
                    range_max=vocab_size,
                    distortion=power,
                    unigrams=self.freqs,
                )

                self.word_embedding = tf.Variable(
                    tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
                self.nce_weights = tf.Variable(
                    tf.truncated_normal(
                        [vocab_size, embedding_size],
                        stddev=tf.sqrt(1 / embedding_size),
                    ))
                self.nce_biases = tf.Variable(tf.zeros([vocab_size]))
                scalar = 1 / np.sqrt(num_unique_documents + num_topics)
                self.doc_embedding = tf.Variable(
                    tf.random_normal(
                        [num_unique_documents, num_topics],
                        mean=0,
                        stddev=50 * scalar,
                    ))
                self.topic_embedding = tf.get_variable(
                    'topic_embedding',
                    shape=[num_topics, embedding_size],
                    dtype=tf.float32,
                    initializer=tf.orthogonal_initializer(gain=scalar),
                )
                pivot = tf.nn.embedding_lookup(self.word_embedding, self.X)
                proportions = tf.nn.embedding_lookup(self.doc_embedding,
                                                     self.DOC)
                doc = tf.matmul(proportions, self.topic_embedding)
                doc_context = doc
                word_context = pivot
                context = tf.add(word_context, doc_context)
                loss_word2vec = tf.reduce_mean(
                    tf.nn.nce_loss(
                        weights=self.nce_weights,
                        biases=self.nce_biases,
                        labels=self.Y,
                        inputs=context,
                        num_sampled=num_sampled,
                        num_classes=vocab_size,
                        num_true=1,
                        sampled_values=sampler,
                    ))
                self.fraction = tf.Variable(1,
                                            trainable=False,
                                            dtype=tf.float32)

                n_topics = self.doc_embedding.get_shape()[1].value
                log_proportions = tf.nn.log_softmax(self.doc_embedding)
                if alpha is None:
                    alpha = 1.0 / n_topics
                loss = (alpha - 1) * log_proportions
                prior = tf.reduce_sum(loss)

                loss_lda = lmbda * self.fraction * prior
                global_step = tf.Variable(0,
                                          trainable=False,
                                          name='global_step')
                self.cost = tf.cond(
                    global_step < self.switch_loss,
                    lambda: loss_word2vec,
                    lambda: loss_word2vec + loss_lda,
                )
                loss_avgs_op = moving_avgs.apply(
                    [loss_lda, loss_word2vec, self.cost])
                with tf.control_dependencies([loss_avgs_op]):
                    optimizer = tf.train.AdamOptimizer(
                        learning_rate=learning_rate)
                    gvs = optimizer.compute_gradients(self.cost)
                    capped_gvs = [(
                        tf.clip_by_value(grad, -clip_gradients,
                                         clip_gradients),
                        var,
                    ) for grad, var in gvs]
                    self.optimizer = optimizer.apply_gradients(capped_gvs)
                self.sess = generate_session(_graph, **kwargs)
                self.sess.run(tf.global_variables_initializer())

Example #16

Show file

  def decode(self, tf_example_string_tensor):
    """Decodes serialized tensorflow example and returns a tensor dictionary.

    Args:
      tf_example_string_tensor: a string tensor holding a serialized tensorflow
        example proto.

    Returns:
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
      fields.InputDataFields.filename - string tensor with original dataset
        filename.
      fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
        [None, 4] containing box corners.
      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
        shape [None] indicating the weights of groundtruth boxes.
      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
        [None] containing containing object mask area in pixel squared.
      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
        [None] indicating if the boxes enclose a crowd.

    Optional:
      fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of
        shape [None] indicating if a class is present in the image (1.0) or
        a class is not present in the image (0.0).
      fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
        shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
        is width; 3rd dim is the number of additional channels.
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
        [None] indicating if the boxes represent `group_of` instances.
      fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
        shape [None, num_keypoints, 2] containing keypoints, where the
        coordinates of the keypoints are ordered (y, x).
      fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
        tensor of shape [None, num_keypoints] containing keypoint visibilites.
      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
      fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
        [None * num_classes] containing flattened multiclass scores for
        groundtruth boxes.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
    """
    serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
    decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
                                                    self.items_to_handlers)
    keys = decoder.list_items()
    tensors = decoder.decode(serialized_example, items=keys)
    tensor_dict = dict(zip(keys, tensors))
    is_crowd = fields.InputDataFields.groundtruth_is_crowd
    tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
    tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
    tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
        tensor_dict[fields.InputDataFields.image])[:2]

    if fields.InputDataFields.image_additional_channels in tensor_dict:
      channels = tensor_dict[fields.InputDataFields.image_additional_channels]
      channels = tf.squeeze(channels, axis=3)
      channels = tf.transpose(channels, perm=[1, 2, 0])
      tensor_dict[fields.InputDataFields.image_additional_channels] = channels

    def default_groundtruth_weights():
      return tf.ones(
          [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
          dtype=tf.float32)

    tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
        tf.greater(
            tf.shape(
                tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
            0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
        default_groundtruth_weights)

    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
      # Set all keypoints that are not labeled to NaN.
      gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
      gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
      visibilities_tiled = tf.tile(
          tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1),
          [1, 1, 2])
      tensor_dict[gt_kpt_fld] = tf.where(
          visibilities_tiled,
          tensor_dict[gt_kpt_fld],
          np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

    if self._expand_hierarchy_labels:
      input_fields = fields.InputDataFields
      image_classes, image_confidences = self._expand_image_label_hierarchy(
          tensor_dict[input_fields.groundtruth_image_classes],
          tensor_dict[input_fields.groundtruth_image_confidences])
      tensor_dict[input_fields.groundtruth_image_classes] = image_classes
      tensor_dict[input_fields.groundtruth_image_confidences] = (
          image_confidences)

      box_fields = [
          fields.InputDataFields.groundtruth_group_of,
          fields.InputDataFields.groundtruth_is_crowd,
          fields.InputDataFields.groundtruth_difficult,
          fields.InputDataFields.groundtruth_area,
          fields.InputDataFields.groundtruth_boxes,
          fields.InputDataFields.groundtruth_weights,
      ]

      def expand_field(field_name):
        return self._expansion_box_field_labels(
            tensor_dict[input_fields.groundtruth_classes],
            tensor_dict[field_name])

      # pylint: disable=cell-var-from-loop
      for field in box_fields:
        if field in tensor_dict:
          tensor_dict[field] = tf.cond(
              tf.size(tensor_dict[field]) > 0, lambda: expand_field(field),
              lambda: tensor_dict[field])
      # pylint: enable=cell-var-from-loop

      tensor_dict[input_fields.groundtruth_classes] = (
          self._expansion_box_field_labels(
              tensor_dict[input_fields.groundtruth_classes],
              tensor_dict[input_fields.groundtruth_classes], True))

    if fields.InputDataFields.groundtruth_group_of in tensor_dict:
      group_of = fields.InputDataFields.groundtruth_group_of
      tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)

    if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
      tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
          tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
          dtype=tf.int32)
      tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
          tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
          dtype=tf.int32)

    if fields.InputDataFields.groundtruth_track_ids in tensor_dict:
      tensor_dict[fields.InputDataFields.groundtruth_track_ids] = tf.cast(
          tensor_dict[fields.InputDataFields.groundtruth_track_ids],
          dtype=tf.int32)

    return tensor_dict

Example #17

Show file

    def call(self, x):
        input_image, y_pred, y_true, true_boxes = x

        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
        y_pred = tf.reshape(
            y_pred,
            tf.concat([tf.shape(input=y_pred)[:3],
                       tf.constant([3, -1])],
                      axis=0))

        # initialize the masks
        object_mask = tf.expand_dims(y_true[..., 4], 4)

        # the variable to keep track of number of batches processed
        batch_seen = tf.Variable(0.)

        # compute grid factor and net factor
        grid_h = tf.shape(input=y_true)[1]
        grid_w = tf.shape(input=y_true)[2]
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32),
                                 [1, 1, 1, 1, 2])

        net_h = tf.shape(input=input_image)[1]
        net_w = tf.shape(input=input_image)[2]
        net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32),
                                [1, 1, 1, 1, 2])
        """
        Adjust prediction
        """
        pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] +
                       tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
        pred_box_wh = y_pred[..., 2:4]  # t_wh
        pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]),
                                       4)  # adjust confidence
        pred_box_class = y_pred[..., 5:]  # adjust class probabilities
        """
        Adjust ground truth
        """
        true_box_xy = y_true[..., 0:2]  # (sigma(t_xy) + c_xy)
        true_box_wh = y_true[..., 2:4]  # t_wh
        true_box_conf = tf.expand_dims(y_true[..., 4], 4)
        true_box_class = tf.argmax(input=y_true[..., 5:], axis=-1)
        """
        Compare each predicted box to all true boxes
        """
        # initially, drag all objectness of all boxes to 0
        conf_delta = pred_box_conf - 0

        # then, ignore the boxes which have good overlap with some true box
        true_xy = true_boxes[..., 0:2] / grid_factor
        true_wh = true_boxes[..., 2:4] / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
        pred_wh = tf.expand_dims(
            tf.exp(pred_box_wh) * self.anchors / net_factor, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)

        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(input_tensor=iou_scores, axis=4)
        conf_delta *= tf.expand_dims(
            tf.cast(best_ious < self.ignore_thresh, dtype=tf.float32), 4)
        """
        Compute some online statistics
        """
        true_xy = true_box_xy / grid_factor
        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = pred_box_xy / grid_factor
        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)
        iou_scores = object_mask * tf.expand_dims(iou_scores, 4)

        count = tf.reduce_sum(input_tensor=object_mask)
        count_noobj = tf.reduce_sum(input_tensor=1 - object_mask)
        detect_mask = tf.cast((pred_box_conf * object_mask) >= 0.5,
                              dtype=tf.float32)
        class_mask = tf.expand_dims(
            tf.cast(tf.equal(tf.argmax(input=pred_box_class, axis=-1),
                             true_box_class),
                    dtype=tf.float32), 4)
        recall50 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.5, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        recall75 = tf.reduce_sum(
            input_tensor=tf.cast(iou_scores >= 0.75, dtype=tf.float32) *
            detect_mask * class_mask) / (count + 1e-3)
        avg_iou = tf.reduce_sum(input_tensor=iou_scores) / (count + 1e-3)
        avg_obj = tf.reduce_sum(input_tensor=pred_box_conf *
                                object_mask) / (count + 1e-3)
        avg_noobj = tf.reduce_sum(input_tensor=pred_box_conf *
                                  (1 - object_mask)) / (count_noobj + 1e-3)
        avg_cat = tf.reduce_sum(input_tensor=object_mask *
                                class_mask) / (count + 1e-3)
        """
        Warm-up training
        """
        batch_seen = tf.assign_add(batch_seen, 1.)

        true_box_xy, true_box_wh, xywh_mask = tf.cond(
            pred=tf.less(batch_seen, self.warmup_batches + 1),
            true_fn=lambda: [
                true_box_xy +
                (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) *
                (1 - object_mask), true_box_wh + tf.zeros_like(true_box_wh) *
                (1 - object_mask),
                tf.ones_like(object_mask)
            ],
            false_fn=lambda: [true_box_xy, true_box_wh, object_mask])
        """
        Compare each true box to all anchor boxes
        """
        wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
        wh_scale = tf.expand_dims(
            2 - wh_scale[..., 0] * wh_scale[..., 1],
            axis=4)  # the smaller the box, the bigger the scale

        xy_delta = xywh_mask * (pred_box_xy -
                                true_box_xy) * wh_scale * self.xywh_scale
        wh_delta = xywh_mask * (pred_box_wh -
                                true_box_wh) * wh_scale * self.xywh_scale
        conf_delta = object_mask * (
            pred_box_conf - true_box_conf) * self.obj_scale + (
                1 - object_mask) * conf_delta * self.noobj_scale
        class_delta = object_mask * \
                      tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
                      self.class_scale

        loss_xy = tf.reduce_sum(input_tensor=tf.square(xy_delta),
                                axis=list(range(1, 5)))
        loss_wh = tf.reduce_sum(input_tensor=tf.square(wh_delta),
                                axis=list(range(1, 5)))
        loss_conf = tf.reduce_sum(input_tensor=tf.square(conf_delta),
                                  axis=list(range(1, 5)))
        loss_class = tf.reduce_sum(input_tensor=class_delta,
                                   axis=list(range(1, 5)))

        loss = loss_xy + loss_wh + loss_conf + loss_class

        loss = tf.Print(loss, [grid_h, avg_obj],
                        message='avg_obj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_noobj],
                        message='avg_noobj \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_iou],
                        message='avg_iou \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, avg_cat],
                        message='avg_cat \t\t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall50],
                        message='recall50 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, recall75],
                        message='recall75 \t',
                        summarize=1000)
        loss = tf.Print(loss, [grid_h, count],
                        message='count \t',
                        summarize=1000)
        loss = tf.Print(loss, [
            grid_h,
            tf.reduce_sum(input_tensor=loss_xy),
            tf.reduce_sum(input_tensor=loss_wh),
            tf.reduce_sum(input_tensor=loss_conf),
            tf.reduce_sum(input_tensor=loss_class)
        ],
                        message='loss xy, wh, conf, class: \t',
                        summarize=1000)

        return loss * self.grid_scale

Example #18

Show file

    def call(self, y_pred, mask=None):
        '''
        Returns:
            3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded
            to always yield `top_k` predictions per batch item. The last axis contains
            the coordinates for each predicted box in the format
            `[class_id, confidence, xmin, ymin, xmax, ymax]`.
        '''

        #####################################################################################
        # 1. Convert the box coordinates from predicted anchor box offsets to predicted
        #    absolute coordinates
        #####################################################################################

        # Extract the predicted class IDs as the indices of the highest confidence values.
        class_ids = tf.expand_dims(tf.to_float(
            tf.argmax(y_pred[..., :-12], axis=-1)),
                                   axis=-1)
        # Extract the confidences of the maximal classes.
        confidences = tf.reduce_max(y_pred[..., :-12], axis=-1, keep_dims=True)

        # Convert anchor box offsets to image offsets.
        cx = y_pred[..., -12] * y_pred[..., -4] * y_pred[..., -6] + y_pred[
            ..., -8]  # cx = cx_pred * cx_variance * w_anchor + cx_anchor
        cy = y_pred[..., -11] * y_pred[..., -3] * y_pred[..., -5] + y_pred[
            ..., -7]  # cy = cy_pred * cy_variance * h_anchor + cy_anchor
        w = tf.exp(y_pred[..., -10] * y_pred[..., -2]) * y_pred[
            ..., -6]  # w = exp(w_pred * variance_w) * w_anchor
        h = tf.exp(y_pred[..., -9] * y_pred[..., -1]) * y_pred[
            ..., -5]  # h = exp(h_pred * variance_h) * h_anchor

        # Convert 'centroids' to 'corners'.
        xmin = cx - 0.5 * w
        ymin = cy - 0.5 * h
        xmax = cx + 0.5 * w
        ymax = cy + 0.5 * h

        # If the model predicts box coordinates relative to the image dimensions and they are supposed
        # to be converted back to absolute coordinates, do that.
        def normalized_coords():
            xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1)
            ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1)
            xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1)
            ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1)
            return xmin1, ymin1, xmax1, ymax1

        def non_normalized_coords():
            return tf.expand_dims(xmin, axis=-1), tf.expand_dims(
                ymin,
                axis=-1), tf.expand_dims(xmax,
                                         axis=-1), tf.expand_dims(ymax,
                                                                  axis=-1)

        xmin, ymin, xmax, ymax = tf.cond(self.tf_normalize_coords,
                                         normalized_coords,
                                         non_normalized_coords)

        # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor.
        y_pred = tf.concat(
            values=[class_ids, confidences, xmin, ymin, xmax, ymax], axis=-1)

        #####################################################################################
        # 2. Perform confidence thresholding, non-maximum suppression, and top-k filtering.
        #####################################################################################

        batch_size = tf.shape(y_pred)[0]  # Output dtype: tf.int32
        n_boxes = tf.shape(y_pred)[1]
        n_classes = y_pred.shape[2] - 4
        class_indices = tf.range(1, n_classes)

        # Create a function that filters the predictions for the given batch item. Specifically, it performs:
        # - confidence thresholding
        # - non-maximum suppression (NMS)
        # - top-k filtering
        def filter_predictions(batch_item):

            # Keep only the non-background boxes.
            positive_boxes = tf.not_equal(batch_item[..., 0], 0.0)
            predictions = tf.boolean_mask(tensor=batch_item,
                                          mask=positive_boxes)

            def perform_confidence_thresholding():
                # Apply confidence thresholding.
                threshold_met = predictions[:, 1] > self.tf_confidence_thresh
                return tf.boolean_mask(tensor=predictions, mask=threshold_met)

            def no_positive_boxes():
                return tf.constant(value=0.0, shape=(1, 6))

            # If there are any positive predictions, perform confidence thresholding.
            predictions_conf_thresh = tf.cond(
                tf.equal(tf.size(predictions), 0), no_positive_boxes,
                perform_confidence_thresholding)

            def perform_nms():
                scores = predictions_conf_thresh[..., 1]

                # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
                xmin = tf.expand_dims(predictions_conf_thresh[..., -4],
                                      axis=-1)
                ymin = tf.expand_dims(predictions_conf_thresh[..., -3],
                                      axis=-1)
                xmax = tf.expand_dims(predictions_conf_thresh[..., -2],
                                      axis=-1)
                ymax = tf.expand_dims(predictions_conf_thresh[..., -1],
                                      axis=-1)
                boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)

                maxima_indices = tf.image.non_max_suppression(
                    boxes=boxes,
                    scores=scores,
                    max_output_size=self.tf_nms_max_output_size,
                    iou_threshold=self.iou_threshold,
                    name='non_maximum_suppresion')
                maxima = tf.gather(params=predictions_conf_thresh,
                                   indices=maxima_indices,
                                   axis=0)
                return maxima

            def no_confident_predictions():
                return tf.constant(value=0.0, shape=(1, 6))

            # If any boxes made the threshold, perform NMS.
            predictions_nms = tf.cond(
                tf.equal(tf.size(predictions_conf_thresh), 0),
                no_confident_predictions, perform_nms)

            # Perform top-k filtering for this batch item or pad it in case there are
            # fewer than `self.top_k` boxes left at this point. Either way, produce a
            # tensor of length `self.top_k`. By the time we return the final results tensor
            # for the whole batch, all batch items must have the same number of predicted
            # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
            # predictions are left after the filtering process above, we pad the missing
            # predictions with zeros as dummy entries.
            def top_k():
                return tf.gather(params=predictions_nms,
                                 indices=tf.nn.top_k(predictions_nms[:, 1],
                                                     k=self.tf_top_k,
                                                     sorted=True).indices,
                                 axis=0)

            def pad_and_top_k():
                padded_predictions = tf.pad(tensor=predictions_nms,
                                            paddings=[[
                                                0, self.tf_top_k -
                                                tf.shape(predictions_nms)[0]
                                            ], [0, 0]],
                                            mode='CONSTANT',
                                            constant_values=0.0)
                return tf.gather(params=padded_predictions,
                                 indices=tf.nn.top_k(padded_predictions[:, 1],
                                                     k=self.tf_top_k,
                                                     sorted=True).indices,
                                 axis=0)

            top_k_boxes = tf.cond(
                tf.greater_equal(tf.shape(predictions_nms)[0], self.tf_top_k),
                top_k, pad_and_top_k)

            return top_k_boxes

        # Iterate `filter_predictions()` over all batch items.
        output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x),
                                  elems=y_pred,
                                  dtype=None,
                                  parallel_iterations=128,
                                  back_prop=False,
                                  swap_memory=False,
                                  infer_shape=True,
                                  name='loop_over_batch')

        return output_tensor

Example #19

Show file

File: image.py Project: Mesitis/luminoth

def patch_image(image,
                bboxes=None,
                offset_height=0,
                offset_width=0,
                target_height=None,
                target_width=None):
    """Gets a patch using tf.image.crop_to_bounding_box and adjusts bboxes

    If patching would leave us with zero bboxes, we return the image and bboxes
    unchanged.

    Args:
        image: Float32 Tensor with shape (H, W, 3).
        bboxes: Tensor with the ground-truth boxes. Shaped (total_boxes, 5).
            The last element in each box is the category label.
        offset_height: Height of the upper-left corner of the patch with
            respect to the original image. Non-negative.
        offset_width: Width of the upper-left corner of the patch with respect
            to the original image. Non-negative.
        target_height: Height of the patch. If set to none, it will be the
            maximum (tf.shape(image)[0] - offset_height - 1). Positive.
        target_width: Width of the patch. If set to none, it will be the
            maximum (tf.shape(image)[1] - offset_width - 1). Positive.

    Returns:
        image: Patch of the original image.
        bboxes: Adjusted bboxes (only those whose centers are inside the
            patch). The key isn't set if bboxes is None.
    """
    # TODO: make this function safe with respect to senseless inputs (i.e
    # having an offset_height that's larger than tf.shape(image)[0], etc.)
    # As of now we only use it inside random_patch, which already makes sure
    # the arguments are legal.
    im_shape = tf.shape(image)
    if target_height is None:
        target_height = im_shape[0] - offset_height - 1
    if target_width is None:
        target_width = im_shape[1] - offset_width - 1

    new_image = tf.image.crop_to_bounding_box(
        image,
        offset_height=offset_height,
        offset_width=offset_width,
        target_height=target_height,
        target_width=target_width,
    )
    patch_shape = tf.shape(new_image)

    # Return if we didn't have bboxes.
    if bboxes is None:
        # Resize the patch to the original image's size. This is to make sure
        # we respect restrictions in image size in the models.
        new_image_resized = tf.image.resize_images(
            new_image, im_shape[:2], method=tf.image.ResizeMethod.BILINEAR)
        return_dict = {"image": new_image_resized}
        return return_dict

    # Now we will remove all bboxes whose centers are not inside the cropped
    # image.

    # First get the x  and y coordinates of the center of each of the
    # bboxes.
    bboxes_center_x = tf.reduce_mean(
        tf.concat(
            [
                # bboxes[:, 0] gets a Tensor with shape (20,).
                # We do this to get a Tensor with shape (20, 1).
                bboxes[:, 0:1],
                bboxes[:, 2:3],
            ],
            axis=1,
        ))
    bboxes_center_y = tf.reduce_mean(tf.concat(
        [bboxes[:, 1:2], bboxes[:, 3:4]], axis=1),
                                     axis=1)

    # Now we get a boolean tensor holding for each of the bboxes' centers
    # wheter they are inside the patch.
    center_x_is_inside = tf.logical_and(
        tf.greater(bboxes_center_x, offset_width),
        tf.less(bboxes_center_x, tf.add(target_width, offset_width)))
    center_y_is_inside = tf.logical_and(
        tf.greater(bboxes_center_y, offset_height),
        tf.less(bboxes_center_y, tf.add(target_height, offset_height)))
    center_is_inside = tf.logical_and(center_x_is_inside, center_y_is_inside)

    # Now we mask the bboxes, removing all those whose centers are outside
    # the patch.
    masked_bboxes = tf.boolean_mask(bboxes, center_is_inside)
    # We move the bboxes to the right place, clipping them if
    # necessary.
    new_bboxes_unclipped = tf.concat(
        [
            tf.subtract(masked_bboxes[:, 0:1], offset_width),
            tf.subtract(masked_bboxes[:, 1:2], offset_height),
            tf.subtract(masked_bboxes[:, 2:3], offset_width),
            tf.subtract(masked_bboxes[:, 3:4], offset_height),
        ],
        axis=1,
    )
    # Finally, we clip the boxes and add back the labels.
    new_bboxes = tf.concat(
        [
            tf.to_int32(
                clip_boxes(new_bboxes_unclipped, imshape=patch_shape[:2]), ),
            masked_bboxes[:, 4:],
        ],
        axis=1,
    )
    # Now resize the image to the original size and adjust bboxes accordingly
    new_image_resized = tf.image.resize_images(
        new_image, im_shape[:2], method=tf.image.ResizeMethod.BILINEAR)
    # adjust_bboxes requires height and width values with dtype=float32
    new_bboxes_resized = adjust_bboxes(
        new_bboxes,
        old_height=tf.to_float(patch_shape[0]),
        old_width=tf.to_float(patch_shape[1]),
        new_height=tf.to_float(im_shape[0]),
        new_width=tf.to_float(im_shape[1]),
    )

    # Finally, set up the return dict, but only update the image and bboxes if
    # our patch has at least one bbox in it.
    update_condition = tf.greater_equal(tf.shape(new_bboxes_resized)[0], 1)
    return_dict = {}
    return_dict["image"] = tf.cond(update_condition, lambda: new_image_resized,
                                   lambda: image)
    return_dict["bboxes"] = tf.cond(update_condition,
                                    lambda: new_bboxes_resized, lambda: bboxes)
    return return_dict

Example #20

Show file

        def filter_predictions(batch_item):

            # Keep only the non-background boxes.
            positive_boxes = tf.not_equal(batch_item[..., 0], 0.0)
            predictions = tf.boolean_mask(tensor=batch_item,
                                          mask=positive_boxes)

            def perform_confidence_thresholding():
                # Apply confidence thresholding.
                threshold_met = predictions[:, 1] > self.tf_confidence_thresh
                return tf.boolean_mask(tensor=predictions, mask=threshold_met)

            def no_positive_boxes():
                return tf.constant(value=0.0, shape=(1, 6))

            # If there are any positive predictions, perform confidence thresholding.
            predictions_conf_thresh = tf.cond(
                tf.equal(tf.size(predictions), 0), no_positive_boxes,
                perform_confidence_thresholding)

            def perform_nms():
                scores = predictions_conf_thresh[..., 1]

                # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`.
                xmin = tf.expand_dims(predictions_conf_thresh[..., -4],
                                      axis=-1)
                ymin = tf.expand_dims(predictions_conf_thresh[..., -3],
                                      axis=-1)
                xmax = tf.expand_dims(predictions_conf_thresh[..., -2],
                                      axis=-1)
                ymax = tf.expand_dims(predictions_conf_thresh[..., -1],
                                      axis=-1)
                boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1)

                maxima_indices = tf.image.non_max_suppression(
                    boxes=boxes,
                    scores=scores,
                    max_output_size=self.tf_nms_max_output_size,
                    iou_threshold=self.iou_threshold,
                    name='non_maximum_suppresion')
                maxima = tf.gather(params=predictions_conf_thresh,
                                   indices=maxima_indices,
                                   axis=0)
                return maxima

            def no_confident_predictions():
                return tf.constant(value=0.0, shape=(1, 6))

            # If any boxes made the threshold, perform NMS.
            predictions_nms = tf.cond(
                tf.equal(tf.size(predictions_conf_thresh), 0),
                no_confident_predictions, perform_nms)

            # Perform top-k filtering for this batch item or pad it in case there are
            # fewer than `self.top_k` boxes left at this point. Either way, produce a
            # tensor of length `self.top_k`. By the time we return the final results tensor
            # for the whole batch, all batch items must have the same number of predicted
            # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k`
            # predictions are left after the filtering process above, we pad the missing
            # predictions with zeros as dummy entries.
            def top_k():
                return tf.gather(params=predictions_nms,
                                 indices=tf.nn.top_k(predictions_nms[:, 1],
                                                     k=self.tf_top_k,
                                                     sorted=True).indices,
                                 axis=0)

            def pad_and_top_k():
                padded_predictions = tf.pad(tensor=predictions_nms,
                                            paddings=[[
                                                0, self.tf_top_k -
                                                tf.shape(predictions_nms)[0]
                                            ], [0, 0]],
                                            mode='CONSTANT',
                                            constant_values=0.0)
                return tf.gather(params=padded_predictions,
                                 indices=tf.nn.top_k(padded_predictions[:, 1],
                                                     k=self.tf_top_k,
                                                     sorted=True).indices,
                                 axis=0)

            top_k_boxes = tf.cond(
                tf.greater_equal(tf.shape(predictions_nms)[0], self.tf_top_k),
                top_k, pad_and_top_k)

            return top_k_boxes

Example #21

Show file

def main(unused_argv=None):
    tf.logging.set_verbosity(FLAGS.log)

    if FLAGS.config is None:
        raise RuntimeError("No config name specified.")

    config = utils.get_module("wavenet." + FLAGS.config).Config(
        FLAGS.train_path)

    logdir = FLAGS.logdir
    tf.logging.info("Saving to %s" % logdir)

    with tf.Graph().as_default():
        total_batch_size = FLAGS.total_batch_size
        assert total_batch_size % FLAGS.worker_replicas == 0
        worker_batch_size = total_batch_size / FLAGS.worker_replicas

        # Run the Reader on the CPU
        cpu_device = "/job:localhost/replica:0/task:0/cpu:0"
        if FLAGS.ps_tasks:
            cpu_device = "/job:worker/cpu:0"

        with tf.device(cpu_device):
            inputs_dict = config.get_batch(worker_batch_size)

        with tf.device(
                tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks,
                                               merge_devices=True)):
            global_step = tf.get_variable(
                "global_step", [],
                tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)

            # pylint: disable=cell-var-from-loop
            lr = tf.constant(config.learning_rate_schedule[0])
            for key, value in config.learning_rate_schedule.items():
                lr = tf.cond(tf.less(global_step, key), lambda: lr,
                             lambda: tf.constant(value))
            # pylint: enable=cell-var-from-loop
            tf.summary.scalar("learning_rate", lr)

            # build the model graph
            outputs_dict = config.build(inputs_dict, is_training=True)
            loss = outputs_dict["loss"]
            tf.summary.scalar("train_loss", loss)

            worker_replicas = FLAGS.worker_replicas
            ema = tf.train.ExponentialMovingAverage(decay=0.9999,
                                                    num_updates=global_step)
            opt = tf.train.SyncReplicasOptimizer(
                tf.train.AdamOptimizer(lr, epsilon=1e-8),
                worker_replicas,
                total_num_replicas=worker_replicas,
                variable_averages=ema,
                variables_to_average=tf.trainable_variables())

            train_op = opt.minimize(loss,
                                    global_step=global_step,
                                    name="train",
                                    colocate_gradients_with_ops=True)

            session_config = tf.ConfigProto(allow_soft_placement=True)

            is_chief = (FLAGS.task == 0)
            local_init_op = opt.chief_init_op if is_chief else opt.local_step_init_op

            slim.learning.train(
                train_op=train_op,
                logdir=logdir,
                is_chief=is_chief,
                master=FLAGS.master,
                number_of_steps=config.num_iters,
                global_step=global_step,
                log_every_n_steps=250,
                local_init_op=local_init_op,
                save_interval_secs=300,
                sync_optimizer=opt,
                session_config=session_config,
            )

Example #22

Show file

File: main_dble.py Project: MitchellTesla/google-research

def train(flags):
  """Training entry point."""
  log_dir = flags.log_dir
  flags.pretrained_model_dir = log_dir
  log_dir = os.path.join(log_dir, 'train')
  flags.eval_interval_secs = 0
  with tf.Graph().as_default():
    global_step = tf.Variable(
        0, trainable=False, name='global_step', dtype=tf.int64)
    global_step_confidence = tf.Variable(
        0, trainable=False, name='global_step_confidence', dtype=tf.int64)

    model = build_model(flags)
    images_query_pl, labels_query_pl, \
    images_support_pl, labels_support_pl = \
      build_episode_placeholder(flags)

    # Augments the input.
    if flags.dataset == 'cifar10' or flags.dataset == 'cifar100':
      images_query_pl_aug = data_loader.augment_cifar(
          images_query_pl, is_training=True)
      images_support_pl_aug = data_loader.augment_cifar(
          images_support_pl, is_training=True)
    elif flags.dataset == 'tinyimagenet':
      images_query_pl_aug = data_loader.augment_tinyimagenet(
          images_query_pl, is_training=True)
      images_support_pl_aug = data_loader.augment_tinyimagenet(
          images_support_pl, is_training=True)

    logits, logits_z = build_proto_train_graph(
        images_query=images_query_pl_aug,
        images_support=images_support_pl_aug,
        flags=flags,
        is_training=True,
        model=model)
    # Losses and optimizer
    ## Classification loss
    loss_classification = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            logits=logits,
            labels=tf.one_hot(labels_query_pl, flags.num_classes_train)))

    # Confidence loss
    _, top_k_indices = tf.nn.top_k(logits, k=1)
    pred = tf.squeeze(top_k_indices)
    incorrect_mask = tf.math.logical_not(tf.math.equal(pred, labels_query_pl))
    incorrect_logits_z = tf.boolean_mask(logits_z, incorrect_mask)
    incorrect_labels_z = tf.boolean_mask(labels_query_pl, incorrect_mask)
    signal_variance = tf.math.reduce_sum(tf.cast(incorrect_mask, tf.int32))
    loss_variance_incorrect = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            logits=incorrect_logits_z,
            labels=tf.one_hot(incorrect_labels_z, flags.num_classes_train)))
    loss_variance_zero = 0.0
    loss_confidence = tf.cond(
        tf.greater(signal_variance, 0), lambda: loss_variance_incorrect,
        lambda: loss_variance_zero)

    regu_losses = tf.losses.get_regularization_losses()
    loss = tf.add_n([loss_classification] + regu_losses)

    # Learning rate
    if flags.lr_anneal == 'const':
      learning_rate = flags.init_learning_rate
    elif flags.lr_anneal == 'pwc':
      learning_rate = get_pwc_learning_rate(global_step, flags)
    elif flags.lr_anneal == 'exp':
      lr_decay_step = flags.number_of_steps // flags.n_lr_decay
      learning_rate = tf.train.exponential_decay(
          flags.init_learning_rate,
          global_step,
          lr_decay_step,
          1.0 / flags.lr_decay_rate,
          staircase=True)
    else:
      raise Exception('Not implemented')

    # Optimizer
    optimizer = tf.train.MomentumOptimizer(
        learning_rate=learning_rate, momentum=0.9)
    optimizer_confidence = tf.train.MomentumOptimizer(
        learning_rate=learning_rate, momentum=0.9)

    train_op = contrib_slim.learning.create_train_op(
        total_loss=loss,
        optimizer=optimizer,
        global_step=global_step,
        clip_gradient_norm=flags.clip_gradient_norm)
    variable_variance = []
    for v in tf.trainable_variables():
      if 'fc_variance' in v.name:
        variable_variance.append(v)
    train_op_confidence = contrib_slim.learning.create_train_op(
        total_loss=loss_confidence,
        optimizer=optimizer_confidence,
        global_step=global_step_confidence,
        clip_gradient_norm=flags.clip_gradient_norm,
        variables_to_train=variable_variance)

    tf.summary.scalar('loss', loss)
    tf.summary.scalar('loss_classification', loss_classification)
    tf.summary.scalar('loss_variance', loss_confidence)
    tf.summary.scalar('regu_loss', tf.add_n(regu_losses))
    tf.summary.scalar('learning_rate', learning_rate)
    # Merges all summaries except for pretrain
    summary = tf.summary.merge(
        tf.get_collection('summaries', scope='(?!pretrain).*'))

    # Gets datasets
    few_shot_data_train, test_dataset, train_dataset = get_train_datasets(flags)
    # Defines session and logging
    summary_writer_train = tf.summary.FileWriter(log_dir, flush_secs=1)
    saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True)
    print(saver.saver_def.filename_tensor_name)
    print(saver.saver_def.restore_op_name)
    # pylint: disable=unused-variable
    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()
    supervisor = tf.train.Supervisor(
        logdir=log_dir,
        init_feed_dict=None,
        summary_op=None,
        init_op=tf.global_variables_initializer(),
        summary_writer=summary_writer_train,
        saver=saver,
        global_step=global_step,
        save_summaries_secs=flags.save_summaries_secs,
        save_model_secs=0)

    with supervisor.managed_session() as sess:
      checkpoint_step = sess.run(global_step)
      if checkpoint_step > 0:
        checkpoint_step += 1
      eval_interval_steps = flags.eval_interval_steps
      for step in range(checkpoint_step, flags.number_of_steps):
        # Computes the classification loss using a batch of data.
        images_query, labels_query,\
        images_support, labels_support = \
          few_shot_data_train.next_few_shot_batch(
              query_batch_size_per_task=flags.train_batch_size,
              num_classes_per_task=flags.num_classes_train,
              num_supports_per_class=flags.num_shots_train,
              num_tasks=flags.num_tasks_per_batch)

        feed_dict = {
            images_query_pl: images_query.astype(dtype=np.float32),
            labels_query_pl: labels_query,
            images_support_pl: images_support.astype(dtype=np.float32),
            labels_support_pl: labels_support
        }

        t_batch = time.time()
        dt_batch = time.time() - t_batch

        t_train = time.time()
        loss, loss_confidence = sess.run([train_op, train_op_confidence],
                                         feed_dict=feed_dict)
        dt_train = time.time() - t_train

        if step % 100 == 0:
          summary_str = sess.run(summary, feed_dict=feed_dict)
          summary_writer_train.add_summary(summary_str, step)
          summary_writer_train.flush()
          logging.info('step %d, loss : %.4g, dt: %.3gs, dt_batch: %.3gs', step,
                       loss, dt_train, dt_batch)

        if float(step) / flags.number_of_steps > 0.5:
          eval_interval_steps = flags.eval_interval_fine_steps

        if eval_interval_steps > 0 and step % eval_interval_steps == 0:
          saver.save(sess, os.path.join(log_dir, 'model'), global_step=step)
          eval(
              flags=flags,
              train_dataset=train_dataset,
              test_dataset=test_dataset)

        if float(
            step
        ) > 0.5 * flags.number_of_steps + flags.number_of_steps_to_early_stop:
          break

Example #23

Show file

File: retinanet_parser.py Project: zwq1230/tpu

    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)

        # Gets original image and its size.
        image = data['image']

        if self._aug_policy:
            if AUTOAUG_IMPORTED:
                image, boxes = autoaugment_utils.distort_image_with_autoaugment(
                    image, boxes, self._aug_policy)
            else:
                raise ImportError(
                    'Unable to get autoaugment_utils, likely due '
                    'to imcompatability with TF 2.X.')

        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes = input_utils.random_horizontal_flip(image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=input_utils.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size,
                                     (image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(input_anchor,
                                              self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets,
         num_positives) = anchor_labeler.label_anchors(
             boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': input_anchor.multilevel_boxes,
            'num_positives': num_positives,
            'image_info': image_info,
        }
        return image, labels

Example #24

Show file

  # # This is how the model was pre-trained.
  #(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
  // Change 11 clip grads
  grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None]
      grads, tvars = list(zip(*grads_and_vars))
      all_are_finite = tf.reduce_all(
          [tf.reduce_all(tf.is_finite(g)) for g in grads]) if use_fp16 or manual_fp16 else tf.constant(True, dtype=tf.bool)

  # This is how the model was pre-trained.
  # ensure global norm is a finite number
  # to prevent clip_by_global_norm from having a hizzy fit.
  (clipped_grads, _) = tf.clip_by_global_norm(
      grads, clip_norm=1.0,
      use_norm=tf.cond(
          all_are_finite,
          lambda: tf.global_norm(grads),
          lambda: tf.constant(1.0)))

  #train_op = optimizer.apply_gradients(
  #    list(zip(grads, tvars)), global_step=global_step)
  // Change 12 apply grads using the cliped grads
  train_op = optimizer.apply_gradients(
          list(zip(clipped_grads, tvars)), global_step=global_step)

  # Normally the global step update is done inside of `apply_gradients`.
  # However, neither `AdamWeightDecayOptimizer` nor `LAMBOptimizer` do this.
  # But if you use a different optimizer, you should probably take this line
  # out.
  new_global_step = global_step + 1
  train_op = tf.group(train_op, [global_step.assign(new_global_step)])
  return train_op

Example #25

Show file

File: anchors.py Project: xxxhycl2010/automl

def _generate_detections_tf(cls_outputs,
                            box_outputs,
                            anchor_boxes,
                            indices,
                            classes,
                            image_id,
                            image_scale,
                            num_classes,
                            min_score_thresh=0.2,
                            max_boxes_to_draw=50,
                            soft_nms_sigma=0.0,
                            iou_threshold=0.5,
                            use_native_nms=True):
    """Generates detections with model outputs and anchors.

  Args:
    cls_outputs: a numpy array with shape [N, 1], which has the highest class
      scores on all feature levels. The N is the number of selected
      top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
    box_outputs: a numpy array with shape [N, 4], which stacks box regression
      outputs on all feature levels. The N is the number of selected top-k
      total anchors on all levels. (k being MAX_DETECTION_POINTS)
    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
      feature levels. The N is the number of selected top-k total anchors on
      all levels.
    indices: a numpy array with shape [N], which is the indices from top-k
      selection.
    classes: a numpy array with shape [N], which represents the class
      prediction on all selected anchors from top-k selection.
    image_id: an integer number to specify the image id.
    image_scale: a float tensor representing the scale between original image
      and input image for the detector. It is used to rescale detections for
      evaluating with the original groundtruth annotations.
    num_classes: a integer that indicates the number of classes.
    min_score_thresh: A float representing the threshold for deciding when to
      remove boxes based on score.
    max_boxes_to_draw: Max number of boxes to draw.
    soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter;
      See Bodla et al, https://arxiv.org/abs/1704.04503).  When
        `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
        NMS.
    iou_threshold: A float representing the threshold for deciding whether boxes
      overlap too much with respect to IOU.
    use_native_nms: a bool that indicates whether to use native nms.

  Returns:
    detections: detection results in a tensor with each row representing
      [image_id, y, x, height, width, score, class]
  """
    anchor_boxes = tf.gather(anchor_boxes, indices)

    scores = tf.math.sigmoid(cls_outputs)
    # apply bounding box regression to anchors
    boxes = decode_box_outputs_tf(tf.transpose(box_outputs, [1, 0]),
                                  tf.transpose(anchor_boxes, [1, 0]))

    def _else(detections, class_id, indices):
        """Else branch for generating detections."""
        boxes_cls = tf.gather(boxes, indices)
        scores_cls = tf.gather(scores, indices)
        # Select top-scoring boxes in each class and apply non-maximum suppression
        # (nms) for boxes in the same class. The selected boxes from each class are
        # then concatenated for the final detection outputs.

        if use_native_nms:
            top_detection_idx, scores_cls = tf.image.non_max_suppression_with_scores(
                boxes_cls,
                scores_cls,
                max_boxes_to_draw,
                iou_threshold=iou_threshold,
                score_threshold=min_score_thresh,
                soft_nms_sigma=soft_nms_sigma)
            scores_cls = tf.expand_dims(scores_cls, axis=1)
            boxes_cls = tf.gather(boxes_cls, top_detection_idx)
            top_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1)
        else:
            scores_cls = tf.expand_dims(scores_cls, axis=1)
            all_detections_cls = tf.concat([boxes_cls, scores_cls], axis=1)
            top_detection_idx = nms_tf(all_detections_cls, iou_threshold)
            top_detections_cls = tf.gather(all_detections_cls,
                                           top_detection_idx)
        height = top_detections_cls[:, 2] - top_detections_cls[:, 0]
        width = top_detections_cls[:, 3] - top_detections_cls[:, 1]
        top_detections_cls = tf.stack([
            top_detections_cls[:, 0] * image_scale,
            top_detections_cls[:, 1] * image_scale, height * image_scale,
            width * image_scale, top_detections_cls[:, 4]
        ],
                                      axis=-1)

        top_detections_cls = tf.stack([
            tf.cast(tf.repeat(image_id, tf.size(top_detection_idx)),
                    tf.float32), *tf.unstack(top_detections_cls, 5, axis=1),
            tf.repeat(class_id + 1.0, tf.size(top_detection_idx))
        ],
                                      axis=1)

        detections = tf.concat([detections, top_detections_cls], axis=0)

        return detections

    detections = tf.constant([], tf.float32, [0, 7])
    for c in range(num_classes):
        indices_cls = tf.squeeze(tf.where_v2(tf.equal(classes, c)), axis=-1)
        detections = tf.cond(
            tf.equal(tf.size(indices), 0),
            lambda: detections,
            lambda id=c, id_cls=indices_cls: _else(detections, id, id_cls))
    indices_final = tf.argsort(detections[:, -2], direction='DESCENDING')
    detections = tf.gather(detections,
                           indices_final[:max_boxes_to_draw],
                           name='detection')
    return detections

Example #26

Show file

def _generate_detections_tf(cls_outputs,
                            box_outputs,
                            anchor_boxes,
                            indices,
                            classes,
                            image_id,
                            image_scale,
                            num_classes,
                            use_native_nms=False):
    """Generates detections with model outputs and anchors.

  Args:
    cls_outputs: a numpy array with shape [N, 1], which has the highest class
      scores on all feature levels. The N is the number of selected
      top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
    box_outputs: a numpy array with shape [N, 4], which stacks box regression
      outputs on all feature levels. The N is the number of selected top-k
      total anchors on all levels. (k being MAX_DETECTION_POINTS)
    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
      feature levels. The N is the number of selected top-k total anchors on
      all levels.
    indices: a numpy array with shape [N], which is the indices from top-k
      selection.
    classes: a numpy array with shape [N], which represents the class
      prediction on all selected anchors from top-k selection.
    image_id: an integer number to specify the image id.
    image_scale: a float tensor representing the scale between original image
      and input image for the detector. It is used to rescale detections for
      evaluating with the original groundtruth annotations.
    num_classes: a integer that indicates the number of classes.
    use_native_nms: a bool that indicates whether to use native nms.

  Returns:
    detections: detection results in a tensor with each row representing
      [image_id, y, x, height, width, score, class]
  """
    anchor_boxes = tf.gather(anchor_boxes, indices)

    scores = tf.math.sigmoid(cls_outputs)
    # apply bounding box regression to anchors
    boxes = decode_box_outputs_tf(tf.transpose(box_outputs, [1, 0]),
                                  tf.transpose(anchor_boxes, [1, 0]))

    def _else(detections, class_id):
        """Else branch forr generating detections."""
        boxes_cls = tf.gather(boxes, indices)
        scores_cls = tf.gather(scores, indices)
        # Select top-scoring boxes in each class and apply non-maximum suppression
        # (nms) for boxes in the same class. The selected boxes from each class are
        # then concatenated for the final detection outputs.
        all_detections_cls = tf.concat(
            [tf.reshape(boxes_cls, [-1, 4]), scores_cls], axis=1)
        if use_native_nms:
            top_detection_idx = tf.image.non_max_suppression(
                all_detections_cls[:, :4],
                all_detections_cls[:, 4],
                MAX_DETECTIONS_PER_IMAGE,
                iou_threshold=0.5)
        else:
            top_detection_idx = nms_tf(all_detections_cls, 0.5)
        top_detections_cls = tf.gather(all_detections_cls, top_detection_idx)
        height = top_detections_cls[:, 2] - top_detections_cls[:, 0]
        width = top_detections_cls[:, 3] - top_detections_cls[:, 1]
        top_detections_cls = tf.stack([
            top_detections_cls[:, 0] * image_scale,
            top_detections_cls[:, 1] * image_scale, height * image_scale,
            width * image_scale, top_detections_cls[:, 4]
        ],
                                      axis=-1)

        top_detections_cls = tf.stack([
            tf.cast(tf.repeat(image_id, tf.size(top_detection_idx)),
                    tf.float32), *tf.unstack(top_detections_cls, 5, axis=1),
            tf.repeat(class_id + 1.0, tf.size(top_detection_idx))
        ],
                                      axis=1)

        detections = tf.concat([detections, top_detections_cls], axis=0)
        return detections

    detections = tf.constant([], tf.float32, [0, 7])
    for c in range(num_classes):
        indices = tf.where(tf.equal(classes, c))
        detections = tf.cond(tf.equal(tf.shape(indices)[0], 0),
                             lambda: detections,
                             lambda class_id=c: _else(detections, class_id))

    return tf.identity(detections, name='detection')

Example #27

Show file

File: convnet_builder.py Project: sarthakksu/covid-low-income-bam

  def last_value_quantize(self,
                          inputs,
                          per_channel=False,
                          init_min=-6.0,
                          init_max=6.0,
                          name_prefix='FixedValueQuant',
                          reuse=None,
                          is_training=False,
                          num_bits=8,
                          narrow_range=False,
                          relative_quantile=0,
                          freeze=False,
                          quant_delay=False):
    """Adds a layer that collects quantization ranges as last input ranges.

    LastValueQuantize creates variables called 'min' and 'max', representing the
    interval used for quantization and clamping.

    Args:
      inputs: a tensor containing values to be quantized.
      per_channel: (Optional) a boolean specifying whether to use different
        quantization ranges per output channel.
      init_min: a float scalar, the initial value for variable min.
      init_max: a float scalar, the initial value for variable max.
      name_prefix: name_prefix for created nodes.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      is_training: Whether the op is applied to a training or eval graph.
      num_bits: Number of bits to use for quantization, must be between 2 and 8.
      narrow_range: Whether to use the narrow quantization range
        [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
      relative_quantile: Specify the location of quantization min and max
        parameters. relative_quantile = 0 is equivalent to using min and max
        of input; relative_quantile = 1 set min and max the optimal location
        assuming the input distribution is uniform. In reality, a good value
        should be in the range [0 1].
      freeze: If True, the min and max variables are calculated once at the
        begining of training and then freeze. This is used for quantized
        fine-tuning of a pretrained checkpoint. If False, the min and max are
        calculated and updated every cycle.
      quant_delay: The number of global steps after which the fake quantization
        are turned on. Used for performing fine-tuning experiment without
        starting from a pre-trained checkpoint.
    Returns:
      a tensor containing quantized values.
    """

    with tf.variable_scope(
        None, default_name=name_prefix, values=[inputs], reuse=reuse) as scope:
      scope.set_partitioner(None)
      input_shape = inputs.get_shape()
      input_dim = len(input_shape)
      if per_channel:
        # Only support quantizing 1-, 2- and 4-dimensional tensors.
        assert input_dim in [1, 2, 4]
        min_max_shape = [input_shape[-1]]
      else:
        min_max_shape = []

      min_var = tf.get_variable('min',
                                min_max_shape,
                                tf.float32,
                                initializer=tf.constant_initializer(init_min),
                                trainable=False)
      max_var = tf.get_variable('max',
                                min_max_shape,
                                tf.float32,
                                initializer=tf.constant_initializer(init_max),
                                trainable=False)
      if not is_training:
        return self.delayed_quant(
            inputs,
            min_var,
            max_var,
            per_channel=per_channel,
            num_bits=num_bits,
            narrow_range=narrow_range,
            quant_delay=None)

      if per_channel:
        if input_dim == 2:
          reduce_dims = [0]
        elif input_dim == 4:
          reduce_dims = [0, 1, 2]

      if num_bits >= 4:
        quantile = 0
      else:
        quantile = (1.0 / 2.0**(num_bits + 1.0)) * relative_quantile * 100

      if per_channel:
        if input_dim >= 2:
          batch_min = tfp.stats.percentile(
              inputs, q=quantile, axis=reduce_dims, name='BatchMin')
        else:
          batch_min = inputs
      else:
        batch_min = tfp.stats.percentile(
            inputs, q=quantile, name='BatchMin')

      if per_channel:
        if input_dim >= 2:
          batch_max = tfp.stats.percentile(
              inputs, q=100 - quantile, axis=reduce_dims, name='BatchMax')
        else:
          batch_max = inputs
      else:
        batch_max = tfp.stats.percentile(
            inputs, q=100 - quantile, name='BatchMax')

      if narrow_range:
        multiplier = 1.0
      else:
        multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0)

      batch_abs_max = tf.maximum(tf.abs(batch_min), tf.abs(batch_max))

      if narrow_range:
        batch_adjusted_min = 0 - batch_abs_max
      else:
        multiplier = 1.0 + 1.0 / (2.0**(num_bits-1.0) - 1.0)
        batch_adjusted_min = 0 - tf.scalar_mul(multiplier, batch_abs_max)

      batch_abs_max = tf.cast(batch_abs_max, tf.float32)
      batch_adjusted_min = tf.cast(batch_adjusted_min, tf.float32)

      if freeze:
        def make_var_op(var):
          def f():
            return var
          return f

        quant_step = common.CreateOrGetQuantizationStep()
        min_max_assign = tf.less_equal(
            quant_step, 1, name='MinMaxAssign')
        min_value = tf.cond(min_max_assign,
                            make_var_op(batch_adjusted_min),
                            make_var_op(min_var),
                            name='AssignMinCond')
        max_value = tf.cond(min_max_assign,
                            make_var_op(batch_abs_max),
                            make_var_op(max_var),
                            name='AssignMaxCond')
      else:
        min_value = batch_adjusted_min
        max_value = batch_abs_max

      assign_min = tf.assign(min_var, min_value)
      assign_max = tf.assign(max_var, max_value)

      return self.delayed_quant(
          inputs,
          assign_min,
          assign_max,
          per_channel=per_channel,
          num_bits=num_bits,
          narrow_range=narrow_range,
          quant_delay=quant_delay)

Example #28

Show file

    def _match(self, similarity_matrix):
        """Tries to match each column of the similarity matrix to a row.

    Args:
      similarity_matrix: tensor of shape [N, M] representing any similarity
        metric.

    Returns:
      Match object with corresponding matches for each of M columns.
    """
        def _match_when_rows_are_empty():
            """Performs matching when the rows of similarity matrix are empty.

      When the rows are empty, all detections are false positives. So we return
      a tensor of -1's to indicate that the columns do not match to any rows.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                similarity_matrix)
            return -1 * tf.ones([similarity_matrix_shape[1]], dtype=tf.int32)

        def _match_when_rows_are_non_empty():
            """Performs matching when the rows of similarity matrix are non empty.

      Returns:
        matches:  int32 tensor indicating the row each column matches to.
      """
            # Matches for each column
            matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32)

            # Deal with matched and unmatched threshold
            if self._matched_threshold is not None:
                # Get logical indices of ignored and unmatched columns as tf.int64
                matched_vals = tf.reduce_max(similarity_matrix, 0)
                below_unmatched_threshold = tf.greater(
                    self._unmatched_threshold, matched_vals)
                between_thresholds = tf.logical_and(
                    tf.greater_equal(matched_vals, self._unmatched_threshold),
                    tf.greater(self._matched_threshold, matched_vals))

                if self._negatives_lower_than_unmatched:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -1)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -2)
                else:
                    matches = self._set_values_using_indicator(
                        matches, below_unmatched_threshold, -2)
                    matches = self._set_values_using_indicator(
                        matches, between_thresholds, -1)

            if self._force_match_for_each_row:
                similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape(
                    similarity_matrix)
                force_match_column_ids = tf.argmax(similarity_matrix,
                                                   1,
                                                   output_type=tf.int32)
                force_match_column_indicators = tf.one_hot(
                    force_match_column_ids, depth=similarity_matrix_shape[1])
                force_match_row_ids = tf.argmax(force_match_column_indicators,
                                                0,
                                                output_type=tf.int32)
                force_match_column_mask = tf.cast(
                    tf.reduce_max(force_match_column_indicators, 0), tf.bool)
                final_matches = tf.where(force_match_column_mask,
                                         force_match_row_ids, matches)
                return final_matches
            else:
                return matches

        if similarity_matrix.shape.is_fully_defined():
            if similarity_matrix.shape[0].value == 0:
                return _match_when_rows_are_empty()
            else:
                return _match_when_rows_are_non_empty()
        else:
            return tf.cond(tf.greater(tf.shape(similarity_matrix)[0],
                                      0), _match_when_rows_are_non_empty,
                           _match_when_rows_are_empty)

Example #29

Show file

        tensor_dict[fields.InputDataFields.image])[:2]

    if fields.InputDataFields.image_additional_channels in tensor_dict:
      channels = tensor_dict[fields.InputDataFields.image_additional_channels]
      channels = tf.squeeze(channels, axis=3)
      channels = tf.transpose(channels, perm=[1, 2, 0])
      tensor_dict[fields.InputDataFields.image_additional_channels] = channels

    def default_groundtruth_weights():
      return tf.ones(
          [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
          dtype=tf.float32)

    tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
        tf.greater(
            tf.shape(
                tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
            0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
        default_groundtruth_weights)

    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
      # Set all keypoints that are not labeled to NaN.
      gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
      gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
      visibilities_tiled = tf.tile(
          tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1),
          [1, 1, 2])
      tensor_dict[gt_kpt_fld] = tf.where(
          visibilities_tiled,
          tensor_dict[gt_kpt_fld],
          np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

Example #30

Show file

def random_crop_image(image,
                      boxes,
                      labels,
                      masks=None,
                      keypoints=None,
                      min_object_covered=1.0,
                      aspect_ratio_range=(0.75, 1.33),
                      area_range=(0.1, 1.0),
                      overlap_thresh=0.3,
                      clip_boxes=True,
                      random_coef=0.0,
                      seed=None):
  """Randomly crops the image.

  Given the input image and its bounding boxes, this op randomly
  crops a subimage.  Given a user-provided set of input constraints,
  the crop window is resampled until it satisfies these constraints.
  If within 100 trials it is unable to find a valid crop, the original
  image is returned. See the Args section for a description of the input
  constraints. Both input boxes and returned Boxes are in normalized
  form (e.g., lie in the unit square [0, 1]).
  This function will return the original image with probability random_coef.

  Note: Keypoint coordinates that are outside the crop will be set to NaN, which
  is consistent with the original keypoint encoding for non-existing keypoints.

  Args:
    image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
           with pixel values varying between [0, 1].
    boxes: rank 2 float32 tensor containing the bounding boxes with shape
           [num_instances, 4].
           Boxes are in normalized form meaning their coordinates vary
           between [0, 1].
           Each row is in the form of [ymin, xmin, ymax, xmax].
    labels: rank 1 int32 tensor containing the object classes.
    masks: (optional) rank 3 float32 tensor with shape
           [num_instances, height, width] containing instance masks. The masks
           are of the same height, width as the input `image`.
    keypoints: (optional) rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]. The keypoints are in y-x
               normalized coordinates.
    min_object_covered: the cropped image must cover at least this fraction of
                        at least one of the input bounding boxes.
    aspect_ratio_range: allowed range for aspect ratio of cropped image.
    area_range: allowed range for area ratio between cropped image and the
                original image.
    overlap_thresh: minimum overlap thresh with new cropped
                    image to keep the box.
    clip_boxes: whether to clip the boxes to the cropped image.
    random_coef: a random coefficient that defines the chance of getting the
                 original image. If random_coef is 0, we will always get the
                 cropped image, and if it is 1.0, we will always get the
                 original image.
    seed: random seed.

  Returns:
    image: Image shape will be [new_height, new_width, channels].
    boxes: boxes which is the same rank as input boxes. Boxes are in normalized
           form.
    labels: new labels.

    If label_weights, multiclass_scores, masks, or keypoints is not None, the
    function also returns:
    masks: rank 3 float32 tensor with shape [num_instances, height, width]
           containing instance masks.
    keypoints: rank 3 float32 tensor with shape
               [num_instances, num_keypoints, 2]
  """

  def strict_random_crop_image_fn():
    return _strict_random_crop_image(
        image,
        boxes,
        labels,
        masks=masks,
        keypoints=keypoints,
        min_object_covered=min_object_covered,
        aspect_ratio_range=aspect_ratio_range,
        area_range=area_range,
        overlap_thresh=overlap_thresh,
        clip_boxes=clip_boxes)

  # avoids tf.cond to make faster RCNN training on borg. See b/140057645.
  if random_coef < sys.float_info.min:
    result = strict_random_crop_image_fn()
  else:
    do_a_crop_random = tf.greater(tf.random_uniform([], seed=seed), random_coef)

    outputs = [image, boxes, labels]

    if masks is not None:
      outputs.append(masks)
    if keypoints is not None:
      outputs.append(keypoints)

    result = tf.cond(do_a_crop_random, strict_random_crop_image_fn,
                     lambda: tuple(outputs))
  return result