Esempio n. 1
0
def safe_log(tensor, eps=1e-16):
    is_zero = tf.less(tensor, eps)
    tensor = tf.where(is_zero, tf.ones_like(tensor), tensor)
    tensor = tf.where(is_zero, tf.zeros_like(tensor) - 1e8, tf.log(tensor))
    return tensor
Esempio n. 2
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        if global_step is None:
            global_step = tf.train.get_or_create_global_step()
        new_global_step = global_step + 1

        assignments = []
        for (grad, param) in grads_and_vars:
            if grad is None or param is None:
                continue

            param_name = param.op.name

            v = tf.get_variable(name=param_name + "/Momentum",
                                shape=param.shape.as_list(),
                                dtype=tf.float32,
                                trainable=False,
                                initializer=tf.zeros_initializer())

            if self._use_weight_decay(param_name):
                grad += self.weight_decay * param

            if self.classic_momentum:
                trust_ratio = 1.0
                if self._do_layer_adaptation(param_name):
                    w_norm = tf.norm(param, ord=2)
                    g_norm = tf.norm(grad, ord=2)
                    trust_ratio = tf.where(
                        tf.greater(w_norm, 0),
                        tf.where(tf.greater(g_norm, 0),
                                 (self.eeta * w_norm / g_norm), 1.0), 1.0)
                scaled_lr = self.learning_rate * trust_ratio

                next_v = tf.multiply(self.momentum, v) + scaled_lr * grad
                if self.use_nesterov:
                    update = tf.multiply(self.momentum,
                                         next_v) + scaled_lr * grad
                else:
                    update = next_v
                next_param = param - update
            else:
                next_v = tf.multiply(self.momentum, v) + grad
                if self.use_nesterov:
                    update = tf.multiply(self.momentum, next_v) + grad
                else:
                    update = next_v

                trust_ratio = 1.0
                if self._do_layer_adaptation(param_name):
                    w_norm = tf.norm(param, ord=2)
                    v_norm = tf.norm(update, ord=2)
                    trust_ratio = tf.where(
                        tf.greater(w_norm, 0),
                        tf.where(tf.greater(v_norm, 0),
                                 (self.eeta * w_norm / v_norm), 1.0), 1.0)
                scaled_lr = trust_ratio * self.learning_rate
                next_param = param - scaled_lr * update

            assignments.extend([
                param.assign(next_param),
                v.assign(next_v),
                global_step.assign(new_global_step)
            ])
        return tf.group(*assignments, name=name)
    def body(self,
             features,
             decode_step=None,
             cache=None,
             decoding_stats=None,
             add_summary=True):
        encoder_output = None
        extra_losses = []
        padding_bias = None
        if not self.hparams.fast_decode:
            decode_step = None
        if "inputs" in features:
            inputs = features["inputs"]
            # remove the last two dimensions that are always 1.
            inputs = tf.reshape(
                inputs,
                utils.shape_list(inputs)[:2] + [self.hidden_size])
            # Padding bias only used for seq2seq models.
            padding_bias = utils.embedding_to_padding(inputs)
            # Mask random positions
            shape = utils.shape_list(inputs)
            if self.hparams.input_dropout:
                inputs = tf.where(
                    tf.random.uniform(shape) < self.hparams.input_dropout,
                    tf.zeros_like(inputs), inputs)
            if self.hparams.add_timing_signal:
                inputs += utils.get_timing_signal_1d(self.hparams.max_length,
                                                     self.hidden_size)
            if cache is not None and -1 in cache:
                encoder_output = cache[-1]
            else:
                encoder_output = utils.transformer_encoder_layers(
                    inputs=inputs,
                    num_layers=self.num_encoder_layers,
                    hparams=self.hparams,
                    losses=extra_losses,
                    name="encoder",
                    token_bias=features.get("token_bias_inputs"),
                    padding_bias=padding_bias)
            if cache is not None and -1 not in cache:
                cache[-1] = encoder_output
        targets = tf.to_int32(features["targets"])
        # remove the last two dimensions that are always 1.
        targets = tf.reshape(targets, utils.shape_list(targets)[:2])
        # Clamp targets to max_target_length
        targets = targets[:, :self.hparams.max_target_length]
        if self.is_decode:
            targets = self.process_partial_targets_decoding(targets)
        decoder_input = self.prepare_decoder(targets)

        decoder_output = utils.transformer_decoder_layers(
            inputs=decoder_input,
            num_layers=self.num_decoder_layers,
            hparams=self.hparams,
            encoder_output=encoder_output,
            decode_step=decode_step,
            losses=extra_losses,
            cache=cache,
            name="decoder",
            decoding_stats=decoding_stats,
            token_bias_inputs=features.get("token_bias_inputs"),
            token_bias_targets=features.get("token_bias_targets"),
            padding_bias=padding_bias)
        logits = self.produce_output(decoder_output)

        # Return logits as-is in decoding mode
        if self.is_decode:
            return logits

        # Add cross entropy loss
        one_hot_targets = tf.one_hot(tf.cast(targets, dtype=tf.int32),
                                     self.vocab_size)
        x_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=one_hot_targets, logits=logits)
        weights = tf.to_float(tf.not_equal(targets, 0))
        loss = tf.reduce_sum(x_entropy * weights) / tf.reduce_sum(weights)
        if add_summary:
            tf.summary.scalar("losses/weight", tf.reduce_sum(weights))
            tf.summary.scalar("losses/x_entropy",
                              tf.reduce_sum(x_entropy * weights))

        loss_dict = {"training": loss}
        if extra_losses:
            loss_dict["extra_loss"] = tf.add_n(extra_losses)
        # hack for T2T metrics
        logits = tf.reshape(
            logits,
            utils.shape_list(logits)[:2] + [1, 1] +
            utils.shape_list(logits)[-1:])
        return logits, loss_dict
Esempio n. 4
0
def logmarglike_threetransfergaussians(
    ells,  # (..., )
    y,  # (..., dy)
    yinvvar,  # (..., dy)
    M_T,  #  (..., dt, dy),
    z,  #  (..., dz),
    zinvvar,  #  (..., dz),
    R_T,  #  (..., dt, dz),
    mu,  #  (..., dt),
    muinvvar,  #  (..., dt),
):
    """
    Fit linear model to three Gaussian data sets

    Parameters
    ----------
    ells : ndarray (nobj, )
        scaling between the data: y = ell * z
    y, yinvvar : ndarray (nobj, ..., n_pix_y)
        data and data inverse variances
    M_T : ndarray (..., n_components, n_pix_y)
        design matrix of linear model
    z, zinvvar : ndarray (nobj, ..., n_pix_z)
        data and data variances for y
    R_T : ndarray (..., n_components, n_pix_z)
        design matrix of linear model for z
    mu, muinvvar : ndarray ( ..., n_components)
        data and data variances for y

    Returns
    -------
    logfml : ndarray (nobj, )
        log likelihood values with parameters marginalised and at best fit
    theta_map : ndarray (nobj, ndim)
        Best fit MAP parameters
    theta_cov : ndarray (nobj, ndim, ndim)
        Parameter covariance

    """
    log2pi = tf.cast(tf.math.log(2.0 * np.pi), T)
    nt = tf.cast(tf.shape(M_T)[-2], T)
    nobj = tf.cast(tf.shape(y)[0], T)
    ny = tf.cast(
        tf.math.count_nonzero(tf.where(yinvvar > 0)), T
    )  # tf.cast(tf.shape(y)[-1], T)
    nz = tf.cast(
        tf.math.count_nonzero(tf.where(zinvvar > 0)), T
    )  # tf.cast(tf.shape(z)[-1], T)
    nm = tf.cast(
        tf.math.count_nonzero(tf.where(muinvvar > 0)), T
    )  # tf.cast(tf.shape(mu)[-1], T)
    M = tf.transpose(M_T, [0, 2, 1])  # tf.einsum("...ij->...ji", M_T)
    R = tf.transpose(R_T, [0, 2, 1])  # tf.einsum("...ij->...ji", M_T)
    Hbar = (
        ells[:, None, None] ** 2 * tf.matmul(R_T, R * zinvvar[..., :, None])
        + tf.matmul(M_T, M * yinvvar[..., :, None])
        + tf.eye(nt, dtype=T)[None, :, :]
        * tf.ones((nobj, 1, 1), dtype=T)
        * muinvvar[..., :, None]
    )  #  (..., dt, dt)
    etabar = (
        ells[:, None] * tf.reduce_sum(R_T * (z * zinvvar)[..., None, :], axis=-1)
        + tf.reduce_sum(M_T * (y * yinvvar)[..., None, :], axis=-1)
        + tf.reduce_sum((mu * muinvvar)[..., None, :], axis=-1)
    )  # (..., dt)
    theta_map = tf.linalg.solve(Hbar, etabar[..., None])[..., 0]  # (..., dt)
    theta_cov = tf.linalg.inv(Hbar)
    logdetH = (
        tf.reduce_sum(tf.where(zinvvar > 0, tf.math.log(zinvvar), zinvvar * 0), axis=-1)
        + tf.reduce_sum(
            tf.where(yinvvar > 0, tf.math.log(yinvvar), yinvvar * 0), axis=-1
        )
        + tf.reduce_sum(
            tf.where(muinvvar > 0, tf.math.log(muinvvar), muinvvar * 0), axis=-1
        )
    )
    xi1 = -0.5 * (
        (ny + nz + nm) * log2pi
        - logdetH
        + tf.reduce_sum(y * y * yinvvar, axis=-1)
        + tf.reduce_sum(z * z * zinvvar, axis=-1)
        + tf.reduce_sum(mu * mu * muinvvar, axis=-1)
    )
    logdetHbar = tf.linalg.logdet(Hbar)
    xi2 = -0.5 * (nt * log2pi - logdetHbar + tf.reduce_sum(etabar * theta_map, axis=-1))
    logfml = xi1 - xi2
    return logfml, theta_map, theta_cov
Esempio n. 5
0
def solarize(image, threshold=128):
    # For each pixel in the image, select the pixel
    # if the value is less than the threshold.
    # Otherwise, subtract 255 from the pixel.
    return tf.where(image < threshold, image, 255 - image)
Esempio n. 6
0
    def compute_loss(self, y_true, y_pred):
        """Compute mutlibox loss.

        # Arguments
            y_true: Ground truth targets,
                tensor of shape (?, num_boxes, 4 + num_classes + 8),
                priors in ground truth are fictitious,
                y_true[:, :, -8] has 1 if prior should be penalized
                    or in other words is assigned to some ground truth box,
                y_true[:, :, -7:] are all 0.
            y_pred: Predicted logits,
                tensor of shape (?, num_boxes, 4 + num_classes + 8).

        # Returns
            loss: Loss for prediction, tensor of shape (?,).
        """
        batch_size = tf.shape(y_true)[0]
        num_boxes = tf.to_float(tf.shape(y_true)[1])

        # loss for all priors
        conf_loss = self._softmax_loss(y_true[:, :, 4:-8], y_pred[:, :, 4:-8])
        loc_loss = self._l1_smooth_loss(y_true[:, :, :4], y_pred[:, :, :4])

        # get positives loss
        num_pos = tf.reduce_sum(y_true[:, :, -8], axis=-1)
        pos_loc_loss = tf.reduce_sum(loc_loss * y_true[:, :, -8], axis=1)
        pos_conf_loss = tf.reduce_sum(conf_loss * y_true[:, :, -8], axis=1)

        # get negatives loss, we penalize only confidence here
        num_neg = tf.minimum(self.neg_pos_ratio * num_pos, num_boxes - num_pos)
        pos_num_neg_mask = tf.greater(num_neg, 0)
        has_min = tf.to_float(tf.reduce_any(pos_num_neg_mask))
        num_neg = tf.concat(
            axis=0,
            values=[num_neg, [(1 - has_min) * self.negatives_for_hard]])
        num_neg_batch = tf.reduce_min(
            tf.boolean_mask(num_neg, tf.greater(num_neg, 0)))
        num_neg_batch = tf.to_int32(num_neg_batch)
        confs_start = 4 + self.background_label_id + 1
        confs_end = confs_start + self.num_classes - 1
        max_confs = tf.reduce_max(y_pred[:, :, confs_start:confs_end], axis=2)
        _, indices = tf.nn.top_k(max_confs * (1 - y_true[:, :, -8]),
                                 k=num_neg_batch)
        batch_idx = tf.expand_dims(tf.range(0, batch_size), 1)
        batch_idx = tf.tile(batch_idx, (1, num_neg_batch))
        full_indices = (tf.reshape(batch_idx, [-1]) * tf.to_int32(num_boxes) +
                        tf.reshape(indices, [-1]))
        # full_indices = tf.concat(2, [tf.expand_dims(batch_idx, 2),
        #                              tf.expand_dims(indices, 2)])
        # neg_conf_loss = tf.gather_nd(conf_loss, full_indices)
        neg_conf_loss = tf.gather(tf.reshape(conf_loss, [-1]), full_indices)
        neg_conf_loss = tf.reshape(neg_conf_loss, [batch_size, num_neg_batch])
        neg_conf_loss = tf.reduce_sum(neg_conf_loss, axis=1)

        # loss is sum of positives and negatives
        total_loss = pos_conf_loss + neg_conf_loss
        total_loss /= (num_pos + tf.to_float(num_neg_batch))
        num_pos = tf.where(tf.not_equal(num_pos, 0), num_pos,
                           tf.ones_like(num_pos))
        total_loss += (self.alpha * pos_loc_loss) / num_pos
        return total_loss
Esempio n. 7
0
    def decode(self, serialized_example):
        """Decode the serialized example.

        Args:
          serialized_example: a single serialized tf.Example string.

        Returns:
          decoded_tensors: a dictionary of tensors with the following fields:
            - image: a uint8 tensor of shape [None, None, 3].
            - source_id: a string scalar tensor.
            - height: an integer scalar tensor.
            - width: an integer scalar tensor.
            - groundtruth_classes: a int64 tensor of shape [None].
            - groundtruth_is_crowd: a bool tensor of shape [None].
            - groundtruth_area: a float32 tensor of shape [None].
            - groundtruth_boxes: a float32 tensor of shape [None, 4].
            - groundtruth_instance_masks: a float32 tensor of shape
                [None, None, None].
            - groundtruth_instance_masks_png: a string tensor of shape [None].
        """
        parsed_tensors = tf.io.parse_single_example(serialized_example,
                                                    self._keys_to_features)
        for k in parsed_tensors:
            if isinstance(parsed_tensors[k], tf.SparseTensor):
                if parsed_tensors[k].dtype == tf.string:
                    parsed_tensors[k] = tf.sparse_tensor_to_dense(
                        parsed_tensors[k], default_value='')
                else:
                    parsed_tensors[k] = tf.sparse_tensor_to_dense(
                        parsed_tensors[k], default_value=0)

        image = self._decode_image(parsed_tensors)
        boxes = self._decode_boxes(parsed_tensors)
        areas = self._decode_areas(parsed_tensors)

        decode_image_shape = tf.logical_or(
            tf.equal(parsed_tensors['image/height'], -1),
            tf.equal(parsed_tensors['image/width'], -1))
        image_shape = tf.cast(tf.shape(image), dtype=tf.int64)

        parsed_tensors['image/height'] = tf.where(
            decode_image_shape, image_shape[0], parsed_tensors['image/height'])
        parsed_tensors['image/width'] = tf.where(decode_image_shape,
                                                 image_shape[1],
                                                 parsed_tensors['image/width'])

        is_crowds = tf.cond(
            tf.greater(tf.shape(parsed_tensors['image/object/is_crowd'])[0], 0),
            lambda: tf.cast(parsed_tensors['image/object/is_crowd'], dtype=tf.bool),
            lambda: tf.zeros_like(parsed_tensors['image/object/class/label'],
                                  dtype=tf.bool))  # pylint: disable=line-too-long
        if self._regenerate_source_id:
            source_id = _get_source_id_from_encoded_image(parsed_tensors)
        else:
            source_id = tf.cond(
                tf.greater(
                    tf.strings.length(parsed_tensors['image/source_id']),
                    0), lambda: parsed_tensors['image/source_id'],
                lambda: _get_source_id_from_encoded_image(parsed_tensors))
        if self._include_mask:
            masks = self._decode_masks(parsed_tensors)

        decoded_tensors = {
            'image': image,
            'source_id': source_id,
            'height': parsed_tensors['image/height'],
            'width': parsed_tensors['image/width'],
            'groundtruth_classes': parsed_tensors['image/object/class/label'],
            'groundtruth_is_crowd': is_crowds,
            'groundtruth_area': areas,
            'groundtruth_boxes': boxes,
        }
        if self._include_mask:
            decoded_tensors.update({
                'groundtruth_instance_masks':
                masks,
                'groundtruth_instance_masks_png':
                parsed_tensors['image/object/mask'],
            })
        return decoded_tensors
Esempio n. 8
0
def inception_model_fn(features, labels, mode, params):
    """Inception v4 model using Estimator API."""
    num_classes = FLAGS.num_classes
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    is_eval = (mode == tf.estimator.ModeKeys.EVAL)

    if isinstance(features, dict):
        features = features['feature']

    features = tensor_transform_fn(features, params['model_transpose_dims'])

    # This nested function allows us to avoid duplicating the logic which
    # builds the network, for different values of --precision.
    def build_network():
        if FLAGS.precision == 'bfloat16':
            with contrib_tpu.bfloat16_scope():
                logits, end_points = inception.inception_v4(
                    features, num_classes, is_training=is_training)
            logits = tf.cast(logits, tf.float32)
        elif FLAGS.precision == 'float32':
            logits, end_points = inception.inception_v4(
                features, num_classes, is_training=is_training)
        return logits, end_points

    if FLAGS.clear_update_collections:
        with arg_scope(
                inception.inception_v4_arg_scope(
                    weight_decay=0.0,
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON,
                    updates_collections=None)):
            logits, end_points = build_network()
    else:
        with arg_scope(
                inception.inception_v4_arg_scope(
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON)):
            logits, end_points = build_network()

    predictions = {
        'classes': tf.argmax(input=logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
            not FLAGS.use_tpu):
        with tf.control_dependencies([
                tf.Print(predictions['classes'], [predictions['classes']],
                         summarize=FLAGS.eval_batch_size,
                         message='prediction: ')
        ]):
            labels = tf.Print(labels, [labels],
                              summarize=FLAGS.eval_batch_size,
                              message='label: ')

    one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32)

    if 'AuxLogits' in end_points:
        tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                        logits=tf.cast(end_points['AuxLogits'],
                                                       tf.float32),
                                        weights=0.4,
                                        label_smoothing=0.1,
                                        scope='aux_loss')

    tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                    logits=logits,
                                    weights=1.0,
                                    label_smoothing=0.1)

    losses = tf.add_n(tf.losses.get_losses())
    l2_loss = []
    for v in tf.trainable_variables():
        tf.logging.info(v.name)
        if 'BatchNorm' not in v.name and 'weights' in v.name:
            l2_loss.append(tf.nn.l2_loss(v))
        tf.logging.info(len(l2_loss))
    loss = losses + WEIGHT_DECAY * tf.add_n(l2_loss)

    initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256
    # Adjust the initial learning rate for warmup
    initial_learning_rate /= (
        FLAGS.learning_rate_decay**((FLAGS.warmup_epochs + FLAGS.cold_epochs) /
                                    FLAGS.learning_rate_decay_epochs))
    final_learning_rate = 0.0001 * initial_learning_rate

    host_call = None
    train_op = None
    if is_training:
        batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size
        global_step = tf.train.get_or_create_global_step()
        current_epoch = tf.cast(
            (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32)

        clr = FLAGS.cold_learning_rate
        wlr = initial_learning_rate / (FLAGS.warmup_epochs + FLAGS.cold_epochs)
        learning_rate = tf.where(
            tf.greater_equal(current_epoch, FLAGS.cold_epochs), (tf.where(
                tf.greater_equal(current_epoch,
                                 FLAGS.warmup_epochs + FLAGS.cold_epochs),
                tf.train.exponential_decay(
                    learning_rate=initial_learning_rate,
                    global_step=global_step,
                    decay_steps=int(
                        FLAGS.learning_rate_decay_epochs * batches_per_epoch),
                    decay_rate=FLAGS.learning_rate_decay,
                    staircase=True),
                tf.multiply(tf.cast(current_epoch, tf.float32), wlr))), clr)

        # Set a minimum boundary for the learning rate.
        learning_rate = tf.maximum(learning_rate,
                                   final_learning_rate,
                                   name='learning_rate')

        if FLAGS.optimizer == 'sgd':
            tf.logging.info('Using SGD optimizer')
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif FLAGS.optimizer == 'momentum':
            tf.logging.info('Using Momentum optimizer')
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=0.9)
        elif FLAGS.optimizer == 'RMS':
            tf.logging.info('Using RMS optimizer')
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  RMSPROP_DECAY,
                                                  momentum=RMSPROP_MOMENTUM,
                                                  epsilon=RMSPROP_EPSILON)
        else:
            tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

        if FLAGS.use_tpu:
            optimizer = contrib_tpu.CrossShardOptimizer(optimizer)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step=global_step)
        if FLAGS.moving_average:
            ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                    num_updates=global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            with tf.control_dependencies([train_op
                                          ]), tf.name_scope('moving_average'):
                train_op = ema.apply(variables_to_average)

        # To log the loss, current learning rate, and epoch for Tensorboard, the
        # summary op needs to be run on the host CPU via host_call. host_call
        # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
        # dimension. These Tensors are implicitly concatenated to
        # [params['batch_size']].
        gs_t = tf.reshape(global_step, [1])
        loss_t = tf.reshape(loss, [1])
        lr_t = tf.reshape(learning_rate, [1])
        ce_t = tf.reshape(current_epoch, [1])

        if not FLAGS.skip_host_call:

            def host_call_fn(gs, loss, lr, ce):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                with summary.create_file_writer(FLAGS.model_dir).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                        summary.scalar('learning_rate',
                                       tf.reduce_mean(lr),
                                       step=gs)
                        summary.scalar('current_epoch',
                                       tf.reduce_mean(ce),
                                       step=gs)

                        return summary.all_summary_ops()

            host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    eval_metrics = None
    if is_eval:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, ]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'accuracy': top_1_accuracy,
                'accuracy@5': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=loss,
                                        train_op=train_op,
                                        host_call=host_call,
                                        eval_metrics=eval_metrics)
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image: image tensor that is preproessed to have normalized value and
          dimension [output_size[0], output_size[1], 3]
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        num_groundtrtuhs: number of objects.
        boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
        pasted_objects_mask: a binary tensor with the same size as image which
          is computed as the union of all the objects masks.
    """
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        if self._include_mask:
            masks = data['groundtruth_instance_masks']

        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            if self._include_mask:
                masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = input_utils.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            if self._include_mask:
                image, boxes, masks = input_utils.random_horizontal_flip(
                    image, boxes, masks)
            else:
                image, boxes = input_utils.random_horizontal_flip(image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=input_utils.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        if self._include_mask:
            masks = tf.gather(masks, indices)
            uncropped_masks = tf.cast(masks, tf.int8)
            uncropped_masks = tf.expand_dims(uncropped_masks, axis=3)
            uncropped_masks = input_utils.resize_and_crop_masks(
                uncropped_masks, image_scale, self._output_size, offset)
            # Transfer boxes to the original image space and do normalization.
            cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0),
                                            [1, 2])
            cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                     [1, 2])
            cropped_boxes = box_utils.normalize_boxes(cropped_boxes,
                                                      image_shape)
            num_masks = tf.shape(masks)[0]
            masks = tf.image.crop_and_resize(
                tf.expand_dims(masks, axis=-1),
                cropped_boxes,
                box_indices=tf.range(num_masks, dtype=tf.int32),
                crop_size=[self._mask_crop_size, self._mask_crop_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)
        indices = tf.range(start=0, limit=tf.shape(classes)[0], dtype=tf.int32)

        # Samples the numbers of masks for pasting.
        m = tf.random.uniform(shape=[],
                              maxval=tf.shape(classes)[0] + 1,
                              dtype=tf.int32)
        m = tf.math.minimum(m, tf.shape(classes)[0])

        # Shuffles the indices of objects and keep the first m objects for pasting.
        shuffled_indices = tf.random.shuffle(indices)
        shuffled_indices = tf.slice(shuffled_indices, [0], [m])

        boxes = tf.gather(boxes, shuffled_indices)
        masks = tf.gather(masks, shuffled_indices)
        classes = tf.gather(classes, shuffled_indices)
        uncropped_masks = tf.gather(uncropped_masks, shuffled_indices)
        pasted_objects_mask = tf.reduce_max(uncropped_masks, 0)
        pasted_objects_mask = tf.cast(pasted_objects_mask, tf.bool)

        labels = {
            'image': image,
            'image_info': image_info,
            'num_groundtrtuhs': tf.shape(classes)[0],
            'boxes': boxes,
            'masks': masks,
            'classes': classes,
            'pasted_objects_mask': pasted_objects_mask,
        }
        return labels
Esempio n. 10
0
    def _build(self, probs, all_anchors, gt_boxes):
        """
        Args:
            all_anchors: A Tensor with anchors for all of SSD's features.
                The shape of the Tensor is (num_anchors, 4).
            gt_boxes: A Tensor with the ground truth boxes for the image.
                The shape of the Tensor is (num_gt, 5), having the truth label
                as the last value for each box.
        Returns:
            class_targets: Either a truth value of the anchor (a value
                between 0 and num_classes, with 0 being background), or -1 when
                the anchor is to be ignored in the minibatch.
                The shape of the Tensor is (num_anchors, 1).
            bbox_offsets_targets: A bounding box regression target for each of
                the anchors that have a greater than zero label. For every
                other anchors we return zeros.
                The shape of the Tensor is (num_anchors, 4).
        """

        all_anchors = tf.cast(all_anchors, tf.float32)
        gt_boxes = tf.cast(gt_boxes, tf.float32)

        # We are going to label each anchor based on the IoU with
        # `gt_boxes`. Start by filling the labels with -1, marking them as
        # unknown.
        anchors_label_shape = tf.gather(tf.shape(all_anchors), [0])
        anchors_label = tf.fill(dims=anchors_label_shape, value=-1.)

        overlaps = bbox_overlap_tf(all_anchors, gt_boxes[:, :4])
        max_overlaps = tf.reduce_max(overlaps, axis=1)

        # Get the index of the best gt_box for each anchor.
        best_gtbox_for_anchors_idx = tf.argmax(overlaps, axis=1)

        # Having the index of the gt bbox with the best label we need to get
        # the label for each gt box and sum 1 to it because 0 is used for
        # background.
        best_fg_labels_for_anchors = tf.add(
            tf.gather(gt_boxes[:, 4], best_gtbox_for_anchors_idx), 1.)
        iou_is_fg = tf.greater_equal(max_overlaps, self._foreground_threshold)
        # We update anchors_label with the value in
        # best_fg_labels_for_anchors only when the box is foreground.
        # TODO: Replace with a sparse_to_dense with -1 default_value
        anchors_label = tf.where(condition=iou_is_fg,
                                 x=best_fg_labels_for_anchors,
                                 y=anchors_label)

        best_anchor_idxs = tf.argmax(overlaps, axis=0)
        is_best_box = tf.sparse_to_dense(sparse_indices=best_anchor_idxs,
                                         sparse_values=True,
                                         default_value=False,
                                         output_shape=tf.cast(
                                             anchors_label_shape, tf.int64),
                                         validate_indices=False)

        # Now we need to find the anchors that are the best for each of the
        # gt_boxes. We overwrite the previous anchors_label with this
        # because setting the best anchor for each gt_box has priority.
        best_anchors_gt_labels = tf.sparse_to_dense(
            sparse_indices=best_anchor_idxs,
            sparse_values=gt_boxes[:, 4] + 1,
            default_value=-1,
            output_shape=tf.cast(anchors_label_shape, tf.int64),
            validate_indices=False,
            name="get_right_labels_for_bestboxes")
        anchors_label = tf.where(condition=is_best_box,
                                 x=best_anchors_gt_labels,
                                 y=anchors_label,
                                 name="update_labels_for_bestbox_anchors")

        # Use the worst backgrounds (the bgs whose probability of being fg is
        # the greatest).
        cls_probs = probs[:, 1:]
        max_cls_probs = tf.reduce_max(cls_probs, axis=1)

        # Exclude boxes with IOU > `background_threshold_high` with any GT.
        iou_less_than_bg_tresh_high_filter = tf.less_equal(
            max_overlaps, self._background_threshold_high)
        bg_anchors = tf.less_equal(anchors_label, 0)
        bg_overlaps_filter = tf.logical_and(iou_less_than_bg_tresh_high_filter,
                                            bg_anchors)

        max_cls_probs = tf.where(
            condition=bg_overlaps_filter,
            x=max_cls_probs,
            y=tf.fill(dims=anchors_label_shape, value=-1.),
        )

        # We calculate up to how many backgrounds we desire based on the
        # final number of foregrounds and the hard minning ratio.
        num_fg_mask = tf.greater(anchors_label, 0.0)
        num_fg = tf.cast(tf.count_nonzero(num_fg_mask), tf.float32)

        num_bg = tf.cast(num_fg * self._hard_negative_ratio, tf.int32)
        top_k_bg = tf.nn.top_k(max_cls_probs, k=num_bg)

        set_bg = tf.sparse_to_dense(sparse_indices=top_k_bg.indices,
                                    sparse_values=True,
                                    default_value=False,
                                    output_shape=anchors_label_shape,
                                    validate_indices=False)

        anchors_label = tf.where(condition=set_bg,
                                 x=tf.fill(dims=anchors_label_shape, value=0.),
                                 y=anchors_label)

        # Next step is to calculate the proper bbox targets for the labeled
        # anchors based on the values of the ground-truth boxes.
        # We have to use only the anchors labeled >= 1, each matching with
        # the proper gt_boxes

        # Get the ids of the anchors that mater for bbox_target comparison.
        is_anchor_with_target = tf.greater(anchors_label, 0)
        anchors_with_target_idx = tf.where(condition=is_anchor_with_target)
        # Get the corresponding ground truth box only for the anchors with
        # target.
        gt_boxes_idxs = tf.gather(best_gtbox_for_anchors_idx,
                                  anchors_with_target_idx)
        # Get the values of the ground truth boxes.
        anchors_gt_boxes = tf.gather_nd(gt_boxes[:, :4], gt_boxes_idxs)
        # We create the same array but with the anchors
        anchors_with_target = tf.gather_nd(all_anchors,
                                           anchors_with_target_idx)
        # We create our targets with bbox_transform
        bbox_targets = encode(anchors_with_target,
                              anchors_gt_boxes,
                              variances=self._variances)

        # We unmap targets to anchor_labels (containing the length of
        # anchors)
        bbox_targets = tf.scatter_nd(indices=anchors_with_target_idx,
                                     updates=bbox_targets,
                                     shape=tf.cast(tf.shape(all_anchors),
                                                   tf.int64))

        return anchors_label, bbox_targets
Esempio n. 11
0
def focal_loss(logits, targets, alpha, gamma, normalizer):
  """Compute the focal loss between `logits` and the golden `target` values.

  Focal loss = -(1-pt)^gamma * log(pt)
  where pt is the probability of being classified to the true class.

  Args:
    logits: A float32 tensor of size
      [batch, height_in, width_in, num_predictions].
    targets: A float32 tensor of size
      [batch, height_in, width_in, num_predictions].
    alpha: A float32 scalar multiplying alpha to the loss from positive examples
      and (1-alpha) to the loss from negative examples.
    gamma: A float32 scalar modulating loss from hard and easy examples.
    normalizer: A float32 scalar normalizes the total loss from all examples.

  Returns:
    loss: A float32 Tensor of size [batch, height_in, width_in, num_predictions]
      representing normalized loss on the prediction map.
  """
  with tf.name_scope('focal_loss'):
    positive_label_mask = tf.equal(targets, 1.0)
    cross_entropy = (
        tf.nn.sigmoid_cross_entropy_with_logits(labels=targets, logits=logits))
    # Below are comments/derivations for computing modulator.
    # For brevity, let x = logits,  z = targets, r = gamma, and p_t = sigmod(x)
    # for positive samples and 1 - sigmoid(x) for negative examples.
    #
    # The modulator, defined as (1 - P_t)^r, is a critical part in focal loss
    # computation. For r > 0, it puts more weights on hard examples, and less
    # weights on easier ones. However if it is directly computed as (1 - P_t)^r,
    # its back-propagation is not stable when r < 1. The implementation here
    # resolves the issue.
    #
    # For positive samples (labels being 1),
    #    (1 - p_t)^r
    #  = (1 - sigmoid(x))^r
    #  = (1 - (1 / (1 + exp(-x))))^r
    #  = (exp(-x) / (1 + exp(-x)))^r
    #  = exp(log((exp(-x) / (1 + exp(-x)))^r))
    #  = exp(r * log(exp(-x)) - r * log(1 + exp(-x)))
    #  = exp(- r * x - r * log(1 + exp(-x)))
    #
    # For negative samples (labels being 0),
    #    (1 - p_t)^r
    #  = (sigmoid(x))^r
    #  = (1 / (1 + exp(-x)))^r
    #  = exp(log((1 / (1 + exp(-x)))^r))
    #  = exp(-r * log(1 + exp(-x)))
    #
    # Therefore one unified form for positive (z = 1) and negative (z = 0)
    # samples is:
    #      (1 - p_t)^r = exp(-r * z * x - r * log(1 + exp(-x))).
    neg_logits = -1.0 * logits
    modulator = tf.exp(gamma * targets * neg_logits - gamma * tf.log1p(
        tf.exp(neg_logits)))
    loss = modulator * cross_entropy
    weighted_loss = tf.where(positive_label_mask, alpha * loss,
                             (1.0 - alpha) * loss)
    weighted_loss /= normalizer
  return weighted_loss
Esempio n. 12
0
def box_matching(boxes, gt_boxes, gt_classes):
    """Match boxes to groundtruth boxes.

  Given the proposal boxes and the groundtruth boxes and classes, perform the
  groundtruth matching by taking the argmax of the IoU between boxes and
  groundtruth boxes.

  Args:
    boxes: a tensor of shape of [batch_size, N, 4] representing the box
      coordiantes to be matched to groundtruth boxes.
    gt_boxes: a tensor of shape of [batch_size, MAX_INSTANCES, 4] representing
      the groundtruth box coordinates. It is padded with -1s to indicate the
      invalid boxes.
    gt_classes: [batch_size, MAX_INSTANCES] representing the groundtruth box
      classes. It is padded with -1s to indicate the invalid classes.

  Returns:
    matched_gt_boxes: a tensor of shape of [batch_size, N, 4], representing
      the matched groundtruth box coordinates for each input box. If the box
      does not overlap with any groundtruth boxes, the matched boxes of it
      will be set to all 0s.
    matched_gt_classes: a tensor of shape of [batch_size, N], representing
      the matched groundtruth classes for each input box. If the box does not
      overlap with any groundtruth boxes, the matched box classes of it will
      be set to 0, which corresponds to the background class.
    matched_gt_indices: a tensor of shape of [batch_size, N], representing
      the indices of the matched groundtruth boxes in the original gt_boxes
      tensor. If the box does not overlap with any groundtruth boxes, the
      index of the matched groundtruth will be set to -1.
    matched_iou: a tensor of shape of [batch_size, N], representing the IoU
      between the box and its matched groundtruth box. The matched IoU is the
      maximum IoU of the box and all the groundtruth boxes.
    iou: a tensor of shape of [batch_size, N, K], representing the IoU matrix
      between boxes and the groundtruth boxes. The IoU between a box and the
      invalid groundtruth boxes whose coordinates are [-1, -1, -1, -1] is -1.
  """
    # Compute IoU between boxes and gt_boxes.
    # iou <- [batch_size, N, K]
    iou = box_utils.bbox_overlap(boxes, gt_boxes)

    # max_iou <- [batch_size, N]
    # 0.0 -> no match to gt, or -1.0 match to no gt
    matched_iou = tf.reduce_max(iou, axis=-1)

    # background_box_mask <- bool, [batch_size, N]
    background_box_mask = tf.less_equal(matched_iou, 0.0)

    argmax_iou_indices = tf.argmax(iou, axis=-1, output_type=tf.int32)

    argmax_iou_indices_shape = tf.shape(argmax_iou_indices)
    batch_indices = (
        tf.expand_dims(tf.range(argmax_iou_indices_shape[0]), axis=-1) *
        tf.ones([1, argmax_iou_indices_shape[-1]], dtype=tf.int32))
    gather_nd_indices = tf.stack([batch_indices, argmax_iou_indices], axis=-1)

    matched_gt_boxes = tf.gather_nd(gt_boxes, gather_nd_indices)
    matched_gt_boxes = tf.where(
        tf.tile(tf.expand_dims(background_box_mask, axis=-1), [1, 1, 4]),
        tf.zeros_like(matched_gt_boxes, dtype=tf.float32), matched_gt_boxes)

    matched_gt_classes = tf.gather_nd(gt_classes, gather_nd_indices)
    matched_gt_classes = tf.where(background_box_mask,
                                  tf.zeros_like(matched_gt_classes),
                                  matched_gt_classes)

    matched_gt_indices = tf.where(background_box_mask,
                                  -tf.ones_like(argmax_iou_indices),
                                  argmax_iou_indices)

    return (matched_gt_boxes, matched_gt_classes, matched_gt_indices,
            matched_iou, iou)
Esempio n. 13
0
def assign_and_sample_proposals(proposed_boxes,
                                gt_boxes,
                                gt_classes,
                                num_samples_per_image=512,
                                mix_gt_boxes=True,
                                fg_fraction=0.25,
                                fg_iou_thresh=0.5,
                                bg_iou_thresh_hi=0.5,
                                bg_iou_thresh_lo=0.0):
    """Assigns the proposals with groundtruth classes and performs subsmpling.

  Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the
  following algorithm to generate the final `num_samples_per_image` RoIs.
    1. Calculates the IoU between each proposal box and each gt_boxes.
    2. Assigns each proposed box with a groundtruth class and box by choosing
       the largest IoU overlap.
    3. Samples `num_samples_per_image` boxes from all proposed boxes, and
       returns box_targets, class_targets, and RoIs.

  Args:
    proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number
      of proposals before groundtruth assignment. The last dimension is the
      box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
      format.
    gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4].
      The coordinates of gt_boxes are in the pixel coordinates of the scaled
      image. This tensor might have padding of values -1 indicating the invalid
      box coordinates.
    gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
      tensor might have paddings with values of -1 indicating the invalid
      classes.
    num_samples_per_image: a integer represents RoI minibatch size per image.
    mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before
      sampling proposals.
    fg_fraction: a float represents the target fraction of RoI minibatch that
      is labeled foreground (i.e., class > 0).
    fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be
      considered foreground (if >= fg_iou_thresh).
    bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).
    bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).

  Returns:
    sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
      coordinates of the sampled RoIs, where K is the number of the sampled
      RoIs, i.e. K = num_samples_per_image.
    sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
      box coordinates of the matched groundtruth boxes of the samples RoIs.
    sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
      classes of the matched groundtruth boxes of the sampled RoIs.
    sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
      indices of the sampled groudntruth boxes in the original `gt_boxes`
      tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i].
  """

    with tf.name_scope('sample_proposals'):
        if mix_gt_boxes:
            boxes = tf.concat([proposed_boxes, gt_boxes], axis=1)
        else:
            boxes = proposed_boxes

        (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
         _) = box_matching(boxes, gt_boxes, gt_classes)

        positive_match = tf.greater(matched_iou, fg_iou_thresh)
        negative_match = tf.logical_and(
            tf.greater_equal(matched_iou, bg_iou_thresh_lo),
            tf.less(matched_iou, bg_iou_thresh_hi))
        ignored_match = tf.less(matched_iou, 0.0)

        # re-assign negatively matched boxes to the background class.
        matched_gt_classes = tf.where(negative_match,
                                      tf.zeros_like(matched_gt_classes),
                                      matched_gt_classes)
        matched_gt_indices = tf.where(negative_match,
                                      tf.zeros_like(matched_gt_indices),
                                      matched_gt_indices)

        sample_candidates = tf.logical_and(
            tf.logical_or(positive_match, negative_match),
            tf.logical_not(ignored_match))

        sampler = (
            balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
                positive_fraction=fg_fraction, is_static=True))

        batch_size, _ = sample_candidates.get_shape().as_list()
        sampled_indicators = []
        for i in range(batch_size):
            sampled_indicator = sampler.subsample(sample_candidates[i],
                                                  num_samples_per_image,
                                                  positive_match[i])
            sampled_indicators.append(sampled_indicator)
        sampled_indicators = tf.stack(sampled_indicators)
        _, sampled_indices = tf.nn.top_k(tf.cast(sampled_indicators,
                                                 dtype=tf.int32),
                                         k=num_samples_per_image,
                                         sorted=True)

        sampled_indices_shape = tf.shape(sampled_indices)
        batch_indices = (
            tf.expand_dims(tf.range(sampled_indices_shape[0]), axis=-1) *
            tf.ones([1, sampled_indices_shape[-1]], dtype=tf.int32))
        gather_nd_indices = tf.stack([batch_indices, sampled_indices], axis=-1)

        sampled_rois = tf.gather_nd(boxes, gather_nd_indices)
        sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices)
        sampled_gt_classes = tf.gather_nd(matched_gt_classes,
                                          gather_nd_indices)
        sampled_gt_indices = tf.gather_nd(matched_gt_indices,
                                          gather_nd_indices)

        return (sampled_rois, sampled_gt_boxes, sampled_gt_classes,
                sampled_gt_indices)
Esempio n. 14
0
  def build_train_graph(self,
                        inputs,
                        min_depth,
                        max_depth,
                        num_mpi_planes,
                        learning_rate=0.0002,
                        beta1=0.9,
                        vgg_model_file=None,
                        global_step=0):
    """Construct the training computation graph.

    Args:
      inputs: dictionary of tensors (see 'input_data' below) needed for training
      min_depth: minimum depth for the PSV and MPI planes
      max_depth: maximum depth for the PSV and MPI planes
      num_mpi_planes: number of MPI planes to infer
      learning_rate: learning rate
      beta1: hyperparameter for Adam
      vgg_model_file: path to vgg weights (needed when vgg loss is used)
      global_step: current optimization step
    Returns:
      A train_op to be used for training.
    """
    print("starting to build graph")
    with tf.name_scope("input_size_randomization"):
      dim_choices = tf.constant([[1, 16], [2, 32], [4, 32], [4, 64], [4, 128],
                                 [8, 32], [8, 64], [8, 128]],
                                dtype=tf.int32)
      rand_dim = tf.random_shuffle(dim_choices)[0, :]
      height_div = rand_dim[0]
      width_div = rand_dim[0]
      num_mpi_planes = rand_dim[1]
      tf.summary.scalar("num_mpi_planes", num_mpi_planes)

    with tf.name_scope("setup"):
      mpi_planes = self.inv_depths(min_depth, max_depth, num_mpi_planes)

    with tf.name_scope("input_data"):
      raw_tgt_image = inputs["tgt_image"]
      raw_ref_image = inputs["ref_image"]
      raw_src_images = inputs["src_images"]

      _, img_height, img_width, _ = raw_src_images.get_shape().as_list(
      )
      img_height = img_height // height_div
      img_width = img_width // width_div

      raw_tgt_image = tf.image.convert_image_dtype(
          raw_tgt_image, dtype=tf.float32)
      raw_ref_image = tf.image.convert_image_dtype(
          raw_ref_image, dtype=tf.float32)
      raw_src_images = tf.image.convert_image_dtype(
          raw_src_images, dtype=tf.float32)
      raw_tgt_image = tf.image.resize_area(raw_tgt_image,
                                           [img_height, img_width])
      raw_ref_image = tf.image.resize_area(raw_ref_image,
                                           [img_height, img_width])
      raw_src_images = tf.image.resize_area(raw_src_images,
                                            [img_height, img_width])

      tgt_pose = inputs["tgt_pose"]
      ref_pose = inputs["ref_pose"]
      src_poses = inputs["src_poses"]
      intrinsics = inputs["intrinsics"]

      # Scale intrinsics based on size randomization
      intrinsics = tf.concat([
          intrinsics[:, 0:1, :] / tf.to_float(width_div),
          intrinsics[:, 1:2, :] / tf.to_float(height_div), intrinsics[:, 2:3, :]
      ],
                             axis=1)
      inputs["intrinsics"] = intrinsics

      _, num_source, _, _ = src_poses.get_shape().as_list()

    with tf.name_scope("inference"):
      print("setting up MPI inference")
      num_mpi_planes = tf.shape(mpi_planes)[0]
      pred = self.infer_mpi(raw_src_images, raw_ref_image, ref_pose, src_poses,
                            intrinsics, num_mpi_planes,
                            mpi_planes)
      rgba_layers = pred["rgba_layers"]
      rgba_layers_refine = pred["rgba_layers_refine"]
      stuff_behind = pred["stuff_behind"]
      refine_input_mpi = pred["refine_input_mpi"]
      psv = pred["psv"]

    with tf.name_scope("synthesis"):
      print("setting up rendering")
      rel_pose = tf.matmul(tgt_pose, tf.matrix_inverse(ref_pose))
      output_image, output_layers = self.mpi_render_view(
          rgba_layers, rel_pose, mpi_planes, intrinsics)
      output_alpha = output_layers[Ellipsis, -1]
      output_image_refine, _ = self.mpi_render_view(
          rgba_layers_refine, rel_pose, mpi_planes, intrinsics)

    with tf.name_scope("loss"):
      print("computing losses")
      # Mask loss for pixels outside reference frustum
      loss_mask = tf.where(
          tf.equal(
              tf.reduce_min(
                  tf.abs(tf.reduce_sum(output_layers, axis=-1)),
                  axis=3,
                  keep_dims=True), 0.0),
          tf.zeros_like(output_alpha[:, :, :, 0:1]),
          tf.ones_like(output_alpha[:, :, :, 0:1]))
      loss_mask = tf.stop_gradient(loss_mask)
      tf.summary.image("loss_mask", loss_mask)

      # Helper functions for loss
      def compute_error(real, fake, mask):
        return tf.reduce_mean(mask * tf.abs(fake - real))

      # Normalized VGG loss (from
      # https://github.com/CQFIO/PhotographicImageSynthesis)

      downsample = lambda tensor, ds: tf.nn.avg_pool(tensor, [1, ds, ds, 1],
                                                     [1, ds, ds, 1], "SAME")

      def vgg_loss(raw_tgt_image, output_image, loss_mask):
        """Compute VGG loss."""

        vgg_real = build_vgg19(raw_tgt_image * 255.0, vgg_model_file)
        rescaled_output_image = (output_image + 1.)/2. * 255.0
        vgg_fake = build_vgg19(
            rescaled_output_image, vgg_model_file, reuse=True)
        p0 = compute_error(vgg_real["input"], vgg_fake["input"], loss_mask)
        p1 = compute_error(vgg_real["conv1_2"],
                           vgg_fake["conv1_2"],
                           loss_mask)/2.6
        p2 = compute_error(vgg_real["conv2_2"],
                           vgg_fake["conv2_2"],
                           downsample(loss_mask, 2))/4.8
        p3 = compute_error(vgg_real["conv3_2"],
                           vgg_fake["conv3_2"],
                           downsample(loss_mask, 4))/3.7
        p4 = compute_error(vgg_real["conv4_2"],
                           vgg_fake["conv4_2"],
                           downsample(loss_mask, 8))/5.6
        p5 = compute_error(vgg_real["conv5_2"],
                           vgg_fake["conv5_2"],
                           downsample(loss_mask, 16))*10/1.5
        total_loss = p0+p1+p2+p3+p4+p5
        return total_loss, vgg_real, vgg_fake

      vgg_loss_initial, _, _ = vgg_loss(raw_tgt_image, output_image, loss_mask)
      tf.summary.scalar("vgg_loss_initial", vgg_loss_initial)
      total_loss = vgg_loss_initial

      vgg_loss_refine, _, _ = vgg_loss(raw_tgt_image, output_image_refine,
                                       loss_mask)
      tf.summary.scalar("vgg_loss_refine", vgg_loss_refine)
      total_loss += vgg_loss_refine

    with tf.name_scope("train_op"):
      print("setting up train op")
      train_vars = [var for var in tf.trainable_variables()]
      optim = tf.train.AdamOptimizer(learning_rate, beta1)
      grads_and_vars = optim.compute_gradients(total_loss, var_list=train_vars)
      train_op = [optim.apply_gradients(grads_and_vars)]

    # Summaries
    tf.summary.scalar("total_loss", total_loss)
    # Source images
    for i in range(num_source):
      src_image = raw_src_images[:, :, :, i*3:(i+1)*3]
      tf.summary.image("src_image_%d" % i, src_image)
    # Output image
    tf.summary.image("output_image", self.deprocess_image(output_image))
    # Refined output image
    tf.summary.image("output_image_refine",
                     self.deprocess_image(output_image_refine))
    # Target image
    tf.summary.image("tgt_image", raw_tgt_image)
    # Ref image
    tf.summary.image("ref_image", raw_ref_image)
    # Predicted color and alpha layers, and PSV
    num_summ = 16  # Number of plane summaries to show in tensorboard
    for i in range(num_summ):
      ind = tf.to_int32(i * num_mpi_planes/num_summ)
      rgb = rgba_layers[:, :, :, ind, :3]
      alpha = rgba_layers[:, :, :, ind, -1:]
      ref_plane = psv[:, :, :, ind, 3:6]
      source_plane = psv[:, :, :, ind, :3]
      output_rgb = output_layers[:, :, :, ind, :3]
      tf.summary.image("rgb_layer_%d" % i, self.deprocess_image(rgb))
      tf.summary.image("alpha_layer_%d" % i, alpha)
      tf.summary.image("rgba_layer_%d" % i, self.deprocess_image(rgb * alpha))
      tf.summary.image("psv_avg_%d" % i,
                       (self.deprocess_image(0.5*ref_plane + 0.5*source_plane)))
      tf.summary.image("output_rgb_%d" % i,
                       self.deprocess_image(output_rgb))
      tf.summary.image("psv_ref_%d" % i, self.deprocess_image(ref_plane))
      tf.summary.image("psv_source_%d" % i, self.deprocess_image(source_plane))

    # Cumulative rendered images and refined MPI
    for i in range(num_summ):
      ind = tf.to_int32(i * num_mpi_planes/num_summ)
      rgb = rgba_layers_refine[:, :, :, ind, :3]
      alpha = rgba_layers_refine[:, :, :, ind, 3:]
      render = stuff_behind[:, :, :, ind, :3]
      input_colors = refine_input_mpi[:, :, :, ind, :3]
      tf.summary.image("rgb_layer_refine_%d" % i, self.deprocess_image(rgb))
      tf.summary.image("alpha_layer_refine_%d" % i, alpha)
      tf.summary.image("rgba_layer_refine_%d" % i,
                       self.deprocess_image(rgb * alpha))
      tf.summary.image("cumulative_render_%d" % i, self.deprocess_image(render))
      tf.summary.image("input_colors_refine_%d" % i,
                       self.deprocess_image(input_colors))

    return train_op
Esempio n. 15
0
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""

        del labels  # Unused.

        tf.logging.info("*** Features ***")
        for name in sorted(features):
            tf.logging.info("  name = %s, shape = %s", name,
                            features[name].shape)

        label_ids = features["label_ids"]
        input_mask = features["input_mask"]
        row_ids = features["row_ids"]
        column_ids = features["column_ids"]
        # Table cells only, without question tokens and table headers.
        table_mask = tf.where(row_ids > 0, tf.ones_like(row_ids),
                              tf.zeros_like(row_ids))
        do_model_aggregation = config.num_aggregation_labels > 0
        aggregation_function_id = (tf.squeeze(
            features["aggregation_function_id"], axis=[1])
                                   if do_model_aggregation else None)

        do_model_classification = config.num_classification_labels > 0
        classification_class_index = (tf.squeeze(
            features["classification_class_index"], axis=[1])
                                      if do_model_classification else None)

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = table_bert.create_model(
            features=features,
            mode=mode,
            bert_config=config.bert_config,
            disabled_features=config.disabled_features,
            disable_position_embeddings=config.disable_position_embeddings,
            reset_position_index_per_cell=config.reset_position_index_per_cell,
            proj_value_length=config.proj_value_length,
        )

        answer, numeric_values, numeric_values_scale = (
            utils.extract_answer_from_features(
                features=features,
                use_answer_as_supervision=config.use_answer_as_supervision))
        outputs = _get_classification_outputs(
            config=config,
            output_layer=model.get_sequence_output(),
            output_layer_aggregation=model.get_pooled_output(),
            label_ids=label_ids,
            input_mask=input_mask,
            table_mask=table_mask,
            aggregation_function_id=aggregation_function_id,
            answer=answer,
            numeric_values=numeric_values,
            numeric_values_scale=numeric_values_scale,
            is_training=is_training,
            row_ids=row_ids,
            column_ids=column_ids,
            classification_class_index=classification_class_index)
        total_loss = outputs.total_loss

        tvars = tf.trainable_variables()
        if config.reset_output_cls:
            tvars = [
                tvar for tvar in tvars
                if ("output_weights_cls" not in tvar.name
                    and "output_bias_cls" not in tvar.name)
            ]
        initialized_variable_names = set()
        scaffold_fn = None
        init_from_checkpoints = []

        def add_init_checkpoint(init_checkpoint, scope=None):
            if not init_checkpoint:
                return
            (assignment_map, initialized_variables
             ) = modeling.get_assignment_map_from_checkpoint(tvars,
                                                             init_checkpoint,
                                                             scope=scope)
            initialized_variable_names.update(initialized_variables.keys())
            init_from_checkpoints.append((init_checkpoint, assignment_map))

        add_init_checkpoint(config.init_checkpoint)

        if init_from_checkpoints:
            if config.use_tpu:

                def tpu_scaffold():
                    for init_checkpoint, assignment_map in init_from_checkpoints:
                        tf.train.init_from_checkpoint(init_checkpoint,
                                                      assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                for init_checkpoint, assignment_map in init_from_checkpoints:
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)

        fail_if_missing = init_from_checkpoints and params.get(
            "fail_if_missing_variables_in_checkpoint", False)
        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            elif fail_if_missing:
                if "layer_norm" not in var.name and "LayerNorm" not in var.name:
                    tf.logging.fatal("Variable not found in checkpoint: %s",
                                     var.name)
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                total_loss,
                config.learning_rate,
                config.num_train_steps,
                config.num_warmup_steps,
                config.use_tpu,
                gradient_accumulation_steps=params.get(
                    "gradient_accumulation_steps", 1),
                grad_clipping=config.grad_clipping)

            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (_calculate_eval_metrics_fn, [
                total_loss,
                label_ids,
                outputs.logits,
                input_mask,
                aggregation_function_id,
                outputs.logits_aggregation,
                classification_class_index,
                outputs.logits_cls,
            ])
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            predictions = {
                "probabilities": outputs.probs,
                "input_ids": features["input_ids"],
                "column_ids": features["column_ids"],
                "row_ids": features["row_ids"],
                "segment_ids": features["segment_ids"],
                "question_id_ints": features["question_id_ints"],
            }
            if "question_id" in features:
                # Only available when predicting on GPU.
                predictions["question_id"] = features["question_id"]
                del predictions["question_id_ints"]
            if do_model_aggregation:
                predictions.update({
                    "gold_aggr":
                    features["aggregation_function_id"],
                    "pred_aggr":
                    tf.argmax(
                        outputs.logits_aggregation,
                        axis=-1,
                        output_type=tf.int32,
                    )
                })
            if do_model_classification:
                predictions.update({
                    "gold_cls":
                    features["classification_class_index"],
                    "pred_cls":
                    tf.argmax(
                        outputs.logits_cls,
                        axis=-1,
                        output_type=tf.int32,
                    )
                })
                if config.num_classification_labels == 2:
                    predictions.update({
                        "logits_cls":
                        outputs.logits_cls[:, 1] - outputs.logits_cls[:, 0]
                    })
                else:
                    predictions.update({"logits_cls": outputs.logits_cls})
            if outputs.span_indexes is not None and outputs.span_logits is not None:
                predictions.update({"span_indexes": outputs.span_indexes})
                predictions.update({"span_logits": outputs.span_logits})

            if custom_prediction_keys:
                predictions = {
                    key: predictions[key]
                    for key in custom_prediction_keys
                }
            output_spec = tf.estimator.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        return output_spec
Esempio n. 16
0
    def meta_optimize(self):
        """Meta optimization step."""

        probe_images, probe_labels = self.probe_images, self.probe_labels
        labels = self.labels
        net = self.net
        logits = self.logits
        gate_gradients = 1

        batch_size = int(self.batch_size / self.strategy.num_replicas_in_sync)
        init_eps_val = float(1) / batch_size

        meta_net = networks.MetaImage(self.net, name='meta_model')

        if FLAGS.meta_momentum and not self.optimizer.variables():
            # Initializing momentum state of optimizer for meta momentum update.
            # It is a hacky implementation
            logging.info('Pre-initialize optimizer momentum states.')
            idle_net_cost = tf.losses.sparse_softmax_cross_entropy(
                self.labels, logits)
            tmp_var_grads = self.optimizer.compute_gradients(
                tf.reduce_mean(idle_net_cost), net.trainable_variables)
            self.optimizer.apply_gradients(tmp_var_grads)

        with tf.name_scope('coefficient'):
            # Data weight coefficient
            target = tf.constant([init_eps_val] * batch_size,
                                 shape=(batch_size, ),
                                 dtype=np.float32,
                                 name='weight')
            # Data re-labeling coefficient
            eps = tf.constant([FLAGS.grad_eps_init] * batch_size,
                              shape=(batch_size, ),
                              dtype=tf.float32,
                              name='eps')

        onehot_labels = tf.one_hot(labels, self.dataset.num_classes)
        onehot_labels = tf.cast(onehot_labels, tf.float32)
        eps_k = tf.reshape(eps, [batch_size, 1])

        mixed_labels = eps_k * onehot_labels + (1 - eps_k) * self.guessed_label
        # raw softmax loss
        log_softmax = tf.nn.log_softmax(logits)
        net_cost = -tf.reduce_sum(mixed_labels * log_softmax, 1)

        lookahead_loss = tf.reduce_sum(tf.multiply(target, net_cost))
        lookahead_loss = lookahead_loss + net.regularization_loss

        with tf.control_dependencies([lookahead_loss]):
            train_vars = net.trainable_variables
            var_grads = tf.gradients(lookahead_loss,
                                     train_vars,
                                     gate_gradients=gate_gradients)

            static_vars = []
            for i in range(len(train_vars)):
                if FLAGS.meta_momentum > 0:
                    actual_grad = self.meta_momentum_update(
                        var_grads[i], train_vars[i].name, self.optimizer)
                    static_vars.append(
                        tf.math.subtract(train_vars[i],
                                         FLAGS.meta_stepsize * actual_grad))
                else:
                    static_vars.append(
                        tf.math.subtract(train_vars[i],
                                         FLAGS.meta_stepsize * var_grads[i]))
                # new style
                meta_net.add_variable_alias(static_vars[-1],
                                            var_name=train_vars[i].name)

            for uv in net.updates_variables:
                meta_net.add_variable_alias(uv,
                                            var_name=uv.name,
                                            var_type='updates_variables')
            meta_net.verbose()

        with tf.control_dependencies(static_vars):
            g_logits = meta_net(probe_images,
                                name='meta_model',
                                reuse=True,
                                training=True)

            desired_y = tf.one_hot(probe_labels, self.dataset.num_classes)
            meta_loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                desired_y, g_logits)
            meta_loss = tf.reduce_mean(meta_loss, name='meta_loss')
            meta_loss = meta_loss + meta_net.get_regularization_loss(net.wd)
            meta_acc, meta_acc_op = tf.metrics.accuracy(
                probe_labels, tf.argmax(g_logits, axis=1))

        with tf.control_dependencies([meta_loss] + [meta_acc_op]):
            meta_train_vars = meta_net.trainable_variables
            grad_meta_vars = tf.gradients(meta_loss,
                                          meta_train_vars,
                                          gate_gradients=gate_gradients)
            grad_target, grad_eps = tf.gradients(static_vars, [target, eps],
                                                 grad_ys=grad_meta_vars,
                                                 gate_gradients=gate_gradients)
        # updates weight
        raw_weight = target - grad_target
        raw_weight = raw_weight - init_eps_val
        unorm_weight = tf.clip_by_value(raw_weight,
                                        clip_value_min=0,
                                        clip_value_max=float('inf'))
        norm_c = tf.reduce_sum(unorm_weight)
        weight = tf.divide(unorm_weight, norm_c + 0.00001)

        # gets new lambda by the sign of gradient
        new_eps = tf.where(grad_eps < 0,
                           x=tf.ones_like(eps),
                           y=tf.zeros_like(eps))

        return tf.stop_gradient(weight), tf.stop_gradient(
            new_eps), meta_loss, meta_acc
Esempio n. 17
0
def too_close_condition(trip, depth_threshold=0.1):
    depths = trip.depth[:3, :, :, 0]
    depthmax = tf.reduce_max(depths)
    depths = tf.where(tf.equal(depths, 0.0), depthmax * tf.ones_like(depths),
                      depths)
    return tf.greater(tf.reduce_min(depths), depth_threshold)
Esempio n. 18
0
    def train_step(self):
        def step_fn(inputs):
            """Step functon.

      Args:
        inputs: inputs from data iterator

      Returns:
        a set of variables want to observe in Tensorboard
      """
            net = self.net
            (all_images, labels), (self.probe_images,
                                   self.probe_labels) = inputs
            assert len(all_images.shape) == 5
            images, self.aug_images = all_images[:, 0], all_images[:, 1]

            self.images, self.labels = images, labels
            batch_size = int(self.batch_size /
                             self.strategy.num_replicas_in_sync)

            logits = net(images,
                         name='model',
                         reuse=tf.AUTO_REUSE,
                         training=True)
            self.logits = logits

            # other losses
            # initialized first to use self.guessed_label for meta step
            xe_loss, cs_loss = self.unsupervised_loss()

            # meta optimization
            weight, eps, meta_loss, meta_acc = self.meta_optimize()

            ## losses w.r.t new weight and loss
            onehot_labels = tf.one_hot(labels, self.dataset.num_classes)
            onehot_labels = tf.cast(onehot_labels, tf.float32)
            eps_k = tf.reshape(eps, [batch_size, 1])

            mixed_labels = tf.math.add(eps_k * onehot_labels,
                                       (1 - eps_k) * self.guessed_label,
                                       name='mixed_labels')
            net_cost = tf.losses.softmax_cross_entropy(
                mixed_labels, logits, reduction=tf.losses.Reduction.NONE)
            # loss with initial weight
            net_loss1 = tf.reduce_mean(net_cost)

            # loss with initial eps
            init_eps = tf.constant([FLAGS.grad_eps_init] * batch_size,
                                   dtype=tf.float32)
            init_eps = tf.reshape(init_eps, (-1, 1))
            init_mixed_labels = tf.math.add(
                init_eps * onehot_labels, (1 - init_eps) * self.guessed_label,
                name='init_mixed_labels')

            net_cost2 = tf.losses.softmax_cross_entropy(
                init_mixed_labels, logits, reduction=tf.losses.Reduction.NONE)
            net_loss2 = tf.reduce_sum(tf.math.multiply(net_cost2, weight))

            net_loss = (net_loss1 + net_loss2) / 2

            net_loss = net_loss + tf.add_n([xe_loss, cs_loss])
            net_loss += net.regularization_loss
            net_loss /= self.strategy.num_replicas_in_sync

            # rescale by gpus
            with tf.control_dependencies(net.updates):
                net_grads = tf.gradients(net_loss, net.trainable_variables)
                minimizer_op = self.optimizer.apply_gradients(
                    zip(net_grads, net.trainable_variables),
                    global_step=self.global_step)

            with tf.control_dependencies([minimizer_op]):
                train_op = self.ema.apply(net.trainable_variables)

            acc_op, acc_update_op = self.acc_func(labels,
                                                  tf.argmax(logits, axis=1))

            with tf.control_dependencies([train_op, acc_update_op]):
                return (tf.identity(net_loss), tf.identity(xe_loss),
                        tf.identity(cs_loss), tf.identity(meta_loss),
                        tf.identity(meta_acc), tf.identity(acc_op),
                        tf.identity(weight), tf.identity(labels))

        # end of parallel
        (pr_net_loss, pr_xe_loss, pr_cs_loss, pr_metaloss, pr_metaacc, pr_acc,
         pr_weight, pr_labels) = self.strategy.run(
             step_fn,
             args=((next(self.train_input_iterator),
                    next(self.probe_input_iterator)), ))
        # collect device variables
        weights = self.strategy.unwrap(pr_weight)
        weights = tf.concat(weights, axis=0)
        labels = self.strategy.unwrap(pr_labels)
        labels = tf.concat(labels, axis=0)

        mean_acc = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_acc)
        mean_metaacc = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                            pr_metaacc)
        net_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                        pr_net_loss)
        xe_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_xe_loss)
        cs_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN, pr_cs_loss)
        meta_loss = self.strategy.reduce(tf.distribute.ReduceOp.MEAN,
                                         pr_metaloss)

        # The following add variables for tensorboard visualization
        merges = []
        merges.append(tf.summary.scalar('acc/train', mean_acc))
        merges.append(tf.summary.scalar('loss/xemin', xe_loss))
        merges.append(tf.summary.scalar('loss/consistency', cs_loss))
        merges.append(tf.summary.scalar('loss/net', net_loss))
        merges.append(tf.summary.scalar('loss/meta', meta_loss))
        merges.append(tf.summary.scalar('acc/meta', mean_metaacc))
        if hasattr(self, 'eval_acc_on_train'):
            merges.append(
                tf.summary.scalar('acc/eval_on_train',
                                  self.eval_acc_on_train[0]))
            merges.append(
                tf.summary.scalar('acc/eval_on_train_top5',
                                  self.eval_acc_on_train[1]))
            merges.append(
                tf.summary.scalar('acc/num_eval', self.eval_acc_on_train[2]))

        zw_inds = tf.squeeze(
            tf.where(tf.less_equal(weights, 0), name='zero_weight_index'))
        merges.append(
            tf.summary.scalar(
                'weights/zeroratio',
                tf.math.divide(tf.cast(tf.size(zw_inds), tf.float32),
                               tf.cast(tf.size(weights), tf.float32))))

        self.epoch_var = tf.cast(self.global_step / self.iter_epoch,
                                 tf.float32)
        merges.append(tf.summary.scalar('epoch', self.epoch_var))
        merges.append(tf.summary.scalar('learningrate', self.learning_rate))
        summary = tf.summary.merge(merges)

        return [
            net_loss, meta_loss, xe_loss, cs_loss, mean_acc, mean_metaacc,
            summary, weights
        ]
Esempio n. 19
0
    def decode(self, tf_example_string_tensor):
        """Decodes serialized tensorflow example and returns a tensor dictionary.

    Args:
      tf_example_string_tensor: a string tensor holding a serialized tensorflow
        example proto.

    Returns:
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
      fields.InputDataFields.filename - string tensor with original dataset
        filename.
      fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
        [None, 4] containing box corners.
      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
        shape [None] indicating the weights of groundtruth boxes.
      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
        [None] containing containing object mask area in pixel squared.
      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
        [None] indicating if the boxes enclose a crowd.

    Optional:
      fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of
        shape [None] indicating if a class is present in the image (1.0) or
        a class is not present in the image (0.0).
      fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
        shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
        is width; 3rd dim is the number of additional channels.
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
        [None] indicating if the boxes represent `group_of` instances.
      fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
        shape [None, num_keypoints, 2] containing keypoints, where the
        coordinates of the keypoints are ordered (y, x).
      fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
        tensor of shape [None, num_keypoints] containing keypoint visibilites.
      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
      fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
        [None * num_classes] containing flattened multiclass scores for
        groundtruth boxes.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
    """
        serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
        decoder = slim_example_decoder.TFExampleDecoder(
            self.keys_to_features, self.items_to_handlers)
        keys = decoder.list_items()
        tensors = decoder.decode(serialized_example, items=keys)
        tensor_dict = dict(zip(keys, tensors))
        is_crowd = fields.InputDataFields.groundtruth_is_crowd
        tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
        tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
        tensor_dict[
            fields.InputDataFields.original_image_spatial_shape] = tf.shape(
                tensor_dict[fields.InputDataFields.image])[:2]

        if fields.InputDataFields.image_additional_channels in tensor_dict:
            channels = tensor_dict[
                fields.InputDataFields.image_additional_channels]
            channels = tf.squeeze(channels, axis=3)
            channels = tf.transpose(channels, perm=[1, 2, 0])
            tensor_dict[
                fields.InputDataFields.image_additional_channels] = channels

        def default_groundtruth_weights():
            return tf.ones([
                tf.shape(
                    tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
            ],
                           dtype=tf.float32)

        tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
            tf.greater(
                tf.shape(tensor_dict[
                    fields.InputDataFields.groundtruth_weights])[0], 0),
            lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
            default_groundtruth_weights)

        if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
            # Set all keypoints that are not labeled to NaN.
            gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
            gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
            visibilities_tiled = tf.tile(
                tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1), [1, 1, 2])
            tensor_dict[gt_kpt_fld] = tf.where(
                visibilities_tiled, tensor_dict[gt_kpt_fld],
                np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

        if self._expand_hierarchy_labels:
            input_fields = fields.InputDataFields
            image_classes, image_confidences = self._expand_image_label_hierarchy(
                tensor_dict[input_fields.groundtruth_image_classes],
                tensor_dict[input_fields.groundtruth_image_confidences])
            tensor_dict[input_fields.groundtruth_image_classes] = image_classes
            tensor_dict[input_fields.groundtruth_image_confidences] = (
                image_confidences)

            box_fields = [
                fields.InputDataFields.groundtruth_group_of,
                fields.InputDataFields.groundtruth_is_crowd,
                fields.InputDataFields.groundtruth_difficult,
                fields.InputDataFields.groundtruth_area,
                fields.InputDataFields.groundtruth_boxes,
                fields.InputDataFields.groundtruth_weights,
            ]

            def expand_field(field_name):
                return self._expansion_box_field_labels(
                    tensor_dict[input_fields.groundtruth_classes],
                    tensor_dict[field_name])

            # pylint: disable=cell-var-from-loop
            for field in box_fields:
                if field in tensor_dict:
                    tensor_dict[field] = tf.cond(
                        tf.size(tensor_dict[field]) > 0,
                        lambda: expand_field(field),
                        lambda: tensor_dict[field])
            # pylint: enable=cell-var-from-loop

            tensor_dict[input_fields.groundtruth_classes] = (
                self._expansion_box_field_labels(
                    tensor_dict[input_fields.groundtruth_classes],
                    tensor_dict[input_fields.groundtruth_classes], True))

        if fields.InputDataFields.groundtruth_group_of in tensor_dict:
            group_of = fields.InputDataFields.groundtruth_group_of
            tensor_dict[group_of] = tf.cast(tensor_dict[group_of],
                                            dtype=tf.bool)

        if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
            tensor_dict[
                fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
                    tensor_dict[
                        fields.InputDataFields.groundtruth_dp_num_points],
                    dtype=tf.int32)
            tensor_dict[
                fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
                    tensor_dict[
                        fields.InputDataFields.groundtruth_dp_part_ids],
                    dtype=tf.int32)

        return tensor_dict
Esempio n. 20
0
def resize_and_crop_image_v2(image,
                             short_side,
                             long_side,
                             padded_size,
                             aug_scale_min=1.0,
                             aug_scale_max=1.0,
                             seed=1,
                             method=tf.image.ResizeMethod.BILINEAR):
    """Resizes the input image to output size (Faster R-CNN style).

  Resize and pad images given the specified short / long side length and the
  stride size.

  Here are the preprocessing steps.
  1. For a given image, keep its aspect ratio and first try to rescale the short
     side of the original image to `short_side`.
  2. If the scaled image after 1 has a long side that exceeds `long_side`, keep
     the aspect ratio and rescal the long side of the image to `long_side`.
  2. Pad the rescaled image to the padded_size.

  Args:
    image: a `Tensor` of shape [height, width, 3] representing an image.
    short_side: a scalar `Tensor` or `int` representing the desired short side
      to be rescaled to.
    long_side: a scalar `Tensor` or `int` representing the desired long side to
      be rescaled to.
    padded_size: a `Tensor` or `int` list/tuple of two elements representing
      [height, width] of the padded output image size. Padding will be applied
      after scaling the image to the desired_size.
    aug_scale_min: a `float` with range between [0, 1.0] representing minimum
      random scale applied to desired_size for training scale jittering.
    aug_scale_max: a `float` with range between [1.0, inf] representing maximum
      random scale applied to desired_size for training scale jittering.
    seed: seed for random scale jittering.
    method: function to resize input image to scaled image.

  Returns:
    output_image: `Tensor` of shape [height, width, 3] where [height, width]
      equals to `output_size`.
    image_info: a 2D `Tensor` that encodes the information of the image and the
      applied preprocessing. It is in the format of
      [[original_height, original_width], [desired_height, desired_width],
       [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
      desired_width] is the actual scaled image size, and [y_scale, x_scale] is
      the scaling factor, which is the ratio of
      scaled dimension / original dimension.
  """
    with tf.name_scope('resize_and_crop_image_v2'):
        image_size = tf.cast(tf.shape(image)[0:2], tf.float32)

        scale_using_short_side = (short_side /
                                  tf.minimum(image_size[0], image_size[1]))
        scale_using_long_side = (long_side /
                                 tf.maximum(image_size[0], image_size[1]))

        scaled_size = tf.round(image_size * scale_using_short_side)
        scaled_size = tf.where(
            tf.greater(tf.maximum(scaled_size[0], scaled_size[1]), long_side),
            tf.round(image_size * scale_using_long_side), scaled_size)
        desired_size = scaled_size

        random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0)

        if random_jittering:
            random_scale = tf.random_uniform([],
                                             aug_scale_min,
                                             aug_scale_max,
                                             seed=seed)
            scaled_size = tf.round(random_scale * scaled_size)

        # Computes 2D image_scale.
        image_scale = scaled_size / image_size

        # Selects non-zero random offset (x, y) if scaled image is larger than
        # desired_size.
        if random_jittering:
            max_offset = scaled_size - desired_size
            max_offset = tf.where(tf.less(max_offset, 0),
                                  tf.zeros_like(max_offset), max_offset)
            offset = max_offset * tf.random_uniform([
                2,
            ], 0, 1, seed=seed)
            offset = tf.cast(offset, tf.int32)
        else:
            offset = tf.zeros((2, ), tf.int32)

        scaled_image = tf.image.resize_images(image,
                                              tf.cast(scaled_size, tf.int32),
                                              method=method)

        if random_jittering:
            scaled_image = scaled_image[offset[0]:offset[0] + desired_size[0],
                                        offset[1]:offset[1] +
                                        desired_size[1], :]

        output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0,
                                                    padded_size[0],
                                                    padded_size[1])

        image_info = tf.stack([
            image_size,
            tf.cast(desired_size, dtype=tf.float32), image_scale,
            tf.cast(offset, tf.float32)
        ])
        return output_image, image_info
Esempio n. 21
0
def logmarglike_twotransfergaussians(
    ells,
    y,  # (..., dy)
    yinvvar,  # (..., dy)
    M_T,  #  (..., dt, dy),
    z,  #  (..., dz)
    zinvvar,  #  (..., dz)
    R_T,  #  (..., dt, dz),
    perm=[0, 2, 1],
):
    """
    Fit linear model to two Gaussian data sets

    Parameters
    ----------
    ells : ndarray (nobj, )
        scaling between the data: y = ell * z
    y, yinvvar : ndarray (nobj, ..., n_pix_y)
        data and data inverse variances
    M_T : ndarray (..., n_components, n_pix_y)
        design matrix of linear model
    z, zinvvar : ndarray (nobj, ..., n_pix_z)
        data and data inverse variances for z
    R_T : ndarray (..., n_components, n_pix_z)
        design matrix of linear model for z
    perm : list
        permutation to get M and R from R_T and M_T

    Returns
    -------
    logfml : ndarray (nobj, )
        log likelihood values with parameters marginalised and at best fit
    theta_map : ndarray (nobj, ndim)
        Best fit MAP parameters
    theta_cov : ndarray (nobj, ndim, ndim)
        Parameter covariance

    """
    log2pi = tf.cast(tf.math.log(2.0 * np.pi), T)
    nt = tf.cast(tf.shape(M_T)[-2], T)
    ny = tf.cast(
        tf.math.count_nonzero(tf.where(yinvvar > 0)), T
    )  # tf.cast(tf.shape(y)[-1], T)
    nz = tf.cast(
        tf.math.count_nonzero(tf.where(zinvvar > 0)), T
    )  # tf.cast(tf.shape(z)[-1], T)
    M = tf.transpose(M_T, perm)  # tf.einsum("...ij->...ji", M_T)
    R = tf.transpose(R_T, perm)  # tf.einsum("...ij->...ji", M_T)
    Hbar = ells[..., None, None] ** 2 * tf.matmul(
        R_T, R * zinvvar[..., :, None]
    ) + tf.matmul(
        M_T, M * yinvvar[..., :, None]
    )  #  (..., dt, dt)
    etabar = ells[..., None] * tf.reduce_sum(
        R_T * (z * zinvvar)[..., None, :], axis=-1
    ) + tf.reduce_sum(
        M_T * (y * yinvvar)[..., None, :], axis=-1
    )  # (..., dt)
    theta_map = tf.linalg.solve(Hbar, etabar[..., None])[..., 0]  # (..., dt)
    theta_cov = tf.linalg.inv(Hbar)
    logdetH = tf.reduce_sum(
        tf.where(zinvvar > 0, tf.math.log(zinvvar), zinvvar * 0), axis=-1
    ) + tf.reduce_sum(tf.where(yinvvar > 0, tf.math.log(yinvvar), yinvvar * 0), axis=-1)
    xi1 = -0.5 * (
        (ny + nz) * log2pi
        - logdetH
        + tf.reduce_sum(y * y * yinvvar, axis=-1)
        + tf.reduce_sum(z * z * zinvvar, axis=-1)
    )
    logdetHbar = tf.linalg.logdet(Hbar)
    xi2 = -0.5 * (nt * log2pi - logdetHbar + tf.reduce_sum(etabar * theta_map, axis=-1))
    logfml = xi1 - xi2
    return logfml, theta_map, theta_cov
Esempio n. 22
0
def _calculate_expected_result(dist_per_cell, numeric_values,
                               numeric_values_scale, input_mask_float,
                               logits_aggregation, config):
    """Calculate the expected result given cell and aggregation probabilities."""
    if config.use_gumbel_for_cells:
        gumbel_dist = tfp.distributions.RelaxedBernoulli(
            # The token logits where already divided by the temperature and used for
            # computing cell selection errors so we need to multiply it again here
            config.temperature,
            logits=dist_per_cell.logits_parameter() * config.temperature)
        scaled_probability_per_cell = gumbel_dist.sample()
    else:
        scaled_probability_per_cell = _get_probs(dist_per_cell)

    # <float32>[batch_size, seq_length]
    scaled_probability_per_cell = (scaled_probability_per_cell /
                                   numeric_values_scale) * input_mask_float
    count_result = tf.reduce_sum(scaled_probability_per_cell, axis=1)
    numeric_values_masked = tf.where(
        tf.is_nan(numeric_values), tf.zeros_like(numeric_values),
        numeric_values)  # Mask non-numeric table values to zero.
    sum_result = tf.reduce_sum(scaled_probability_per_cell *
                               numeric_values_masked,
                               axis=1)
    avg_approximation = config.average_approximation_function
    if avg_approximation == AverageApproximationFunction.RATIO:
        average_result = sum_result / (count_result + _EPSILON_ZERO_DIVISION)
    elif avg_approximation == AverageApproximationFunction.FIRST_ORDER:
        # The sum of all probabilities exept that correspond to other cells
        ex = (
            tf.reduce_sum(scaled_probability_per_cell, axis=1, keepdims=True) -
            scaled_probability_per_cell + 1)
        average_result = tf.reduce_sum(numeric_values_masked *
                                       scaled_probability_per_cell / ex,
                                       axis=1)
    elif avg_approximation == AverageApproximationFunction.SECOND_ORDER:
        # The sum of all probabilities exept that correspond to other cells
        ex = (
            tf.reduce_sum(scaled_probability_per_cell, axis=1, keepdims=True) -
            scaled_probability_per_cell + 1)
        pointwise_var = (scaled_probability_per_cell *
                         (1 - scaled_probability_per_cell))
        var = tf.reduce_sum(pointwise_var, axis=1,
                            keepdims=True) - pointwise_var
        multiplier = (var / tf.math.square(ex) + 1) / ex
        average_result = tf.reduce_sum(
            numeric_values_masked * scaled_probability_per_cell * multiplier,
            axis=1)
    else:
        tf.logging.error("Invalid average_approximation_function: %s",
                         config.average_approximation_function)

    if config.use_gumbel_for_agg:
        gumbel_dist = tfp.distributions.RelaxedOneHotCategorical(
            config.agg_temperature, logits=logits_aggregation[:, 1:])
        # <float32>[batch_size, num_aggregation_labels - 1]
        aggregation_op_only_probs = gumbel_dist.sample()
    else:
        # <float32>[batch_size, num_aggregation_labels - 1]
        aggregation_op_only_probs = tf.nn.softmax(logits_aggregation[:, 1:] /
                                                  config.agg_temperature,
                                                  axis=-1)
    all_results = tf.concat([
        tf.expand_dims(sum_result, axis=1),
        tf.expand_dims(average_result, axis=1),
        tf.expand_dims(count_result, axis=1)
    ],
                            axis=1)
    expected_result = tf.reduce_sum(all_results * aggregation_op_only_probs,
                                    axis=1)
    return expected_result
Esempio n. 23
0
def huber_loss(x, delta=1.0):
    """Reference: https://en.wikipedia.org/wiki/Huber_loss"""
    return tf.where(
        tf.abs(x) < delta,
        tf.square(x) * 0.5, delta * (tf.abs(x) - 0.5 * delta))
Esempio n. 24
0
def _single_column_cell_selection_loss(token_logits, column_logits, label_ids,
                                       cell_index, col_index, cell_mask):
    """Computes the loss for cell selection constrained to a single column.

  The loss is a hierarchical log-likelihood. The model first predicts a column
  and then selects cells within that column (conditioned on the column). Cells
  outside the selected column are never selected.

  Args:
    token_logits: <float>[batch_size, seq_length] Logits per token.
    column_logits: <float>[batch_size, max_num_cols] Logits per column.
    label_ids: <int32>[batch_size, seq_length] Labels per token.
    cell_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
      groups tokens into cells.
    col_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
      groups tokens into columns.
    cell_mask: <float>[batch_size, max_num_rows * max_num_cols] Input mask per
      cell, 1 for cells that exists in the example and 0 for padding.

  Returns:
    selection_loss_per_example: <float>[batch_size] Loss for each example.
    logits: <float>[batch_size, seq_length] New logits which are only allowed
      to select cells in a single column. Logits outside of the most likely
      column according to `column_logits` will be set to a very low value
      (such that the probabilities are 0).
  """
    # First find the column we should select. We use the column with maximum
    # number of selected cells.
    labels_per_column, _ = segmented_tensor.reduce_sum(
        tf.cast(label_ids, tf.float32), col_index)
    column_label = tf.argmax(labels_per_column, axis=-1, output_type=tf.int32)
    # Check if there are no selected cells in the column. In that case the model
    # should predict the special column id 0, which means "select nothing".
    no_cell_selected = tf.equal(tf.reduce_max(labels_per_column, axis=-1), 0)
    column_label = tf.where(no_cell_selected, tf.zeros_like(column_label),
                            column_label)

    column_dist = tfp.distributions.Categorical(logits=column_logits)
    column_loss_per_example = -column_dist.log_prob(column_label)

    # Reduce the labels and logits to per-cell from per-token.
    logits_per_cell, _ = segmented_tensor.reduce_mean(token_logits, cell_index)
    labels_per_cell, labels_index = segmented_tensor.reduce_max(
        tf.cast(label_ids, tf.int32), cell_index)

    # Mask for the selected column.
    column_id_for_cells = cell_index.project_inner(labels_index).indices
    column_mask = tf.cast(
        tf.equal(column_id_for_cells, tf.expand_dims(column_label, axis=1)),
        tf.float32)

    # Compute the log-likelihood for cells, but only for the selected column.
    cell_dist = tfp.distributions.Bernoulli(logits=logits_per_cell)
    cell_log_prob = cell_dist.log_prob(labels_per_cell)
    cell_loss = -tf.reduce_sum(cell_log_prob * column_mask * cell_mask, axis=1)
    # We need to normalize the loss by the number of cells in the column.
    cell_loss /= tf.reduce_sum(column_mask * cell_mask,
                               axis=1) + _EPSILON_ZERO_DIVISION

    selection_loss_per_example = column_loss_per_example
    selection_loss_per_example += tf.where(
        no_cell_selected, tf.zeros_like(selection_loss_per_example), cell_loss)

    # Set the probs outside the selected column (selected by the *model*)
    # to 0. This ensures backwards compatibility with models that select
    # cells from multiple columns.
    selected_column_id = tf.argmax(column_logits,
                                   axis=-1,
                                   output_type=tf.int32)
    selected_column_mask = tf.cast(
        tf.equal(column_id_for_cells,
                 tf.expand_dims(selected_column_id, axis=-1)), tf.float32)
    # Never select cells with the special column id 0.
    selected_column_mask = tf.where(tf.equal(column_id_for_cells, 0),
                                    tf.zeros_like(selected_column_mask),
                                    selected_column_mask)
    logits_per_cell += _CLOSE_ENOUGH_TO_LOG_ZERO * (
        1.0 - cell_mask * selected_column_mask)
    logits = segmented_tensor.gather(logits_per_cell, cell_index)

    return selection_loss_per_example, logits
Esempio n. 25
0
def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
                                  max_number_of_boxes):
    """Extracts groundtruth data from detection_model and prepares it for eval.

  Args:
    detection_model: A `DetectionModel` object.
    class_agnostic: Whether the detections are class_agnostic.
    max_number_of_boxes: Max number of groundtruth boxes.

  Returns:
    A tuple of:
    groundtruth: Dictionary with the following fields:
      'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes,
        in normalized coordinates.
      'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed
        classes.
      'groundtruth_masks': 4D float32 tensor of instance masks (if provided in
        groundtruth)
      'groundtruth_is_crowd': [batch_size, num_boxes] bool tensor indicating
        is_crowd annotations (if provided in groundtruth).
      'groundtruth_area': [batch_size, num_boxes] float32 tensor indicating
        the area (in the original absolute coordinates) of annotations (if
        provided in groundtruth).
      'num_groundtruth_boxes': [batch_size] tensor containing the maximum number
        of groundtruth boxes per image..
      'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
        tensor of keypoints (if provided in groundtruth).
      'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
        with the number of DensePose points for each instance (if provided in
        groundtruth).
      'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
        max_sampled_points] int32 tensor with the part ids for each DensePose
        sampled point (if provided in groundtruth).
      'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
        max_sampled_points, 4] containing the DensePose surface coordinates for
        each sampled point (if provided in groundtruth).
      'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
        group_of annotations (if provided in groundtruth).
      'groundtruth_labeled_classes': [batch_size, num_classes] int64
        tensor of 1-indexed classes.
    class_agnostic: Boolean indicating whether detections are class agnostic.
  """
    input_data_fields = fields.InputDataFields()
    groundtruth_boxes = tf.stack(
        detection_model.groundtruth_lists(fields.BoxListFields.boxes))
    groundtruth_boxes_shape = tf.shape(groundtruth_boxes)
    # For class-agnostic models, groundtruth one-hot encodings collapse to all
    # ones.
    if class_agnostic:
        groundtruth_classes_one_hot = tf.ones(
            [groundtruth_boxes_shape[0], groundtruth_boxes_shape[1], 1])
    else:
        groundtruth_classes_one_hot = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.classes))
    label_id_offset = 1  # Applying label id offset (b/63711816)
    groundtruth_classes = (tf.argmax(groundtruth_classes_one_hot, axis=2) +
                           label_id_offset)
    groundtruth = {
        input_data_fields.groundtruth_boxes: groundtruth_boxes,
        input_data_fields.groundtruth_classes: groundtruth_classes
    }

    if detection_model.groundtruth_has_field(
            additional_fields.InputDataFields.y_rotation_angle):
        groundtruth[
            additional_fields.GroundtruthResultFields.
            y_rotation_angles] = tf.stack(
                detection_model.groundtruth_lists(
                    additional_fields.InputDataFields.y_rotation_angle))

    if detection_model.groundtruth_has_field(fields.BoxListFields.masks):
        groundtruth[input_data_fields.groundtruth_instance_masks] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.masks))

    if detection_model.groundtruth_has_field(fields.BoxListFields.is_crowd):
        groundtruth[input_data_fields.groundtruth_is_crowd] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.is_crowd))

    if detection_model.groundtruth_has_field(
            input_data_fields.groundtruth_area):
        groundtruth[input_data_fields.groundtruth_area] = tf.stack(
            detection_model.groundtruth_lists(
                input_data_fields.groundtruth_area))

    if detection_model.groundtruth_has_field(fields.BoxListFields.keypoints):
        groundtruth[input_data_fields.groundtruth_keypoints] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.keypoints))

    if detection_model.groundtruth_has_field(
            fields.BoxListFields.keypoint_visibilities):
        groundtruth[
            input_data_fields.groundtruth_keypoint_visibilities] = tf.stack(
                detection_model.groundtruth_lists(
                    fields.BoxListFields.keypoint_visibilities))

    if detection_model.groundtruth_has_field(fields.BoxListFields.group_of):
        groundtruth[input_data_fields.groundtruth_group_of] = tf.stack(
            detection_model.groundtruth_lists(fields.BoxListFields.group_of))

    if detection_model.groundtruth_has_field(
            fields.InputDataFields.groundtruth_labeled_classes):
        labeled_classes_list = detection_model.groundtruth_lists(
            fields.InputDataFields.groundtruth_labeled_classes)
        labeled_classes = [
            tf.where(x)[:, 0] + label_id_offset for x in labeled_classes_list
        ]
        if len(labeled_classes) > 1:
            num_classes = labeled_classes_list[0].shape[0]
            padded_labeled_classes = []
            for x in labeled_classes:
                padding = num_classes - tf.shape(x)[0]
                padded_labeled_classes.append(tf.pad(x, [[0, padding]]))
            groundtruth[
                input_data_fields.groundtruth_labeled_classes] = tf.stack(
                    padded_labeled_classes)
        else:
            groundtruth[
                input_data_fields.groundtruth_labeled_classes] = tf.stack(
                    labeled_classes)

    if detection_model.groundtruth_has_field(
            fields.BoxListFields.densepose_num_points):
        groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
            detection_model.groundtruth_lists(
                fields.BoxListFields.densepose_num_points))
    if detection_model.groundtruth_has_field(
            fields.BoxListFields.densepose_part_ids):
        groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
            detection_model.groundtruth_lists(
                fields.BoxListFields.densepose_part_ids))
    if detection_model.groundtruth_has_field(
            fields.BoxListFields.densepose_surface_coords):
        groundtruth[
            input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
                detection_model.groundtruth_lists(
                    fields.BoxListFields.densepose_surface_coords))
    groundtruth[input_data_fields.num_groundtruth_boxes] = (tf.tile(
        [max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
    return groundtruth
Esempio n. 26
0
def _get_classification_outputs(
    config,
    is_training,
    output_layer,
    output_layer_aggregation,
    label_ids,
    input_mask,
    table_mask,
    aggregation_function_id,
    answer,
    numeric_values,
    numeric_values_scale,
    row_ids,
    column_ids,
    classification_class_index,
):
    """Creates a classification model.

  Args:
    config: Configuration for Tapas model.
    is_training: Whether the model is training.
    output_layer: <float32>[batch_size, seq_length, hidden_size]
    output_layer_aggregation: <float32>[batch_size, hidden_size]
    label_ids: <int32>[batch_size, seq_length]
    input_mask: <int32>[batch_size, seq_length]
    table_mask: <int32>[batch_size, seq_length]
    aggregation_function_id: <int32>[batch_size]
    answer: <float32>[batch_size]
    numeric_values: <float32>[batch_size, seq_length]
    numeric_values_scale: <float32>[batch_size, seq_length]
    row_ids: <int32>[batch_size, seq_length]
    column_ids: <int32>[batch_size, seq_length]
    classification_class_index: <int32>[batch]

  Returns:
    Outputs
  """
    if is_training:
        # I.e., 0.1 dropout
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    # Construct indices for the table.
    row_index = segmented_tensor.IndexMap(indices=tf.minimum(
        row_ids, config.max_num_rows - 1),
                                          num_segments=config.max_num_rows,
                                          batch_dims=1)
    col_index = segmented_tensor.IndexMap(indices=tf.minimum(
        column_ids, config.max_num_columns - 1),
                                          num_segments=config.max_num_columns,
                                          batch_dims=1)
    cell_index = segmented_tensor.ProductIndexMap(row_index, col_index)

    # Masks.
    # <float32>[batch_size, seq_length]
    input_mask_float = tf.cast(input_mask, tf.float32)
    table_mask_float = tf.cast(table_mask, tf.float32)
    # Mask for cells that exist in the table (i.e. that are not padding).
    cell_mask, _ = segmented_tensor.reduce_mean(input_mask_float, cell_index)

    # Compute logits per token. These are used to select individual cells.
    logits = utils.compute_token_logits(
        output_layer=output_layer,
        temperature=config.temperature,
        init_cell_selection_weights_to_zero=(
            config.init_cell_selection_weights_to_zero))

    # Compute logits per column. These are used to select a column.
    if config.select_one_column:
        column_logits = utils.compute_column_logits(
            output_layer=output_layer,
            cell_index=cell_index,
            cell_mask=cell_mask,
            init_cell_selection_weights_to_zero=(
                config.init_cell_selection_weights_to_zero),
            allow_empty_column_selection=config.allow_empty_column_selection)

    # TODO(pawelnow): Extract this into a function.
    # Compute aggregation function logits.
    do_model_aggregation = config.num_aggregation_labels > 0
    if do_model_aggregation:
        hidden_size_agg = output_layer_aggregation.shape[-1].value
        output_weights_agg = tf.get_variable(
            "output_weights_agg",
            shape=[config.num_aggregation_labels, hidden_size_agg],
            initializer=_classification_initializer())
        output_bias_agg = tf.get_variable(
            "output_bias_agg",
            shape=[config.num_aggregation_labels],
            initializer=tf.zeros_initializer())

    do_model_classification = config.num_classification_labels > 0
    logits_cls = None
    if do_model_classification:
        logits_cls = compute_classification_logits(
            config.num_classification_labels, output_layer_aggregation)

    with tf.variable_scope("loss"):
        total_loss = 0.0
        is_supervised = (not do_model_aggregation
                         or not config.use_answer_as_supervision)

        ### Semi-supervised cell selection in case of no aggregation
        #############################################################

        # If the answer (the denotation) appears directly in the table we might
        # select the answer without applying any aggregation function. There are
        # some ambiguous cases, see _calculate_aggregate_mask for more info.
        # `aggregate_mask` is 1 for examples where we chose to aggregate and 0
        #  for examples where we chose to select the answer directly.
        # `label_ids` encodes the positions of the answer appearing in the table.
        if is_supervised:
            aggregate_mask = None
        else:
            # <float32>[batch_size]
            aggregate_mask = _calculate_aggregate_mask(
                answer=answer,
                output_layer_aggregation=output_layer_aggregation,
                output_bias_agg=output_bias_agg,
                output_weights_agg=output_weights_agg,
                cell_select_pref=config.cell_select_pref,
                label_ids=label_ids)

        ### Cell selection log-likelihood
        ###################################

        if config.average_logits_per_cell:
            logits_per_cell, _ = segmented_tensor.reduce_mean(
                logits, cell_index)
            logits = segmented_tensor.gather(logits_per_cell, cell_index)
        dist_per_token = tfp.distributions.Bernoulli(logits=logits)

        selection_loss_per_example = None
        if config.select_one_column:
            selection_loss_per_example, logits = _single_column_cell_selection_loss(
                token_logits=logits,
                column_logits=column_logits,
                label_ids=label_ids,
                cell_index=cell_index,
                col_index=col_index,
                cell_mask=cell_mask)
            dist_per_token = tfp.distributions.Bernoulli(logits=logits)
        else:
            weight = tf.where(
                label_ids == 0, tf.ones_like(label_ids, dtype=tf.float32),
                config.positive_weight *
                tf.ones_like(label_ids, dtype=tf.float32))
            selection_loss_per_token = -dist_per_token.log_prob(
                label_ids) * weight
            selection_loss_per_example = (
                tf.reduce_sum(selection_loss_per_token * input_mask_float,
                              axis=1) /
                (tf.reduce_sum(input_mask_float, axis=1) +
                 _EPSILON_ZERO_DIVISION))

        ### Logits for the aggregation function
        #########################################

        logits_aggregation = None
        if do_model_aggregation:
            logits_aggregation = _calculate_aggregation_logits(
                output_layer_aggregation, output_weights_agg, output_bias_agg)

        ### Classification loss
        ###############################
        if do_model_classification:
            one_hot_labels = tf.one_hot(classification_class_index,
                                        depth=config.num_classification_labels,
                                        dtype=tf.float32)
            if config.classification_label_weight:
                label_weights = [
                    config.classification_label_weight.get(i, 1.0)
                    for i in range(config.num_classification_labels)
                ]
                one_hot_labels *= tf.constant(label_weights, dtype=tf.float32)
            log_probs = tf.nn.log_softmax(logits_cls, axis=-1)
            # <float32>[batch_size]
            per_example_classification_intermediate = -tf.reduce_sum(
                one_hot_labels * log_probs, axis=-1)

            cls_loss = tf.reduce_mean(per_example_classification_intermediate)
            total_loss += cls_loss

        ### Supervised cell selection
        ###############################

        span_indexes = None
        span_logits = None
        if config.span_prediction != SpanPredictionMode.NONE:
            (
                span_indexes,
                span_logits,
                span_loss,
            ) = span_prediction_utils.get_span_logits_by_mode(
                config.span_prediction,
                output_layer,
                label_ids,
                column_ids,
                row_ids,
                max_span_length=10,
            )
            total_loss += span_loss
        elif config.disable_per_token_loss:
            pass
        elif config.mask_examples_without_labels:
            total_loss += tf.reduce_mean(
                span_prediction_utils.compute_masked_example_loss(
                    label_ids,
                    selection_loss_per_example,
                ))
        elif is_supervised:
            total_loss += tf.reduce_mean(selection_loss_per_example)
        else:
            # For the not supervissed case, do not assign loss for cell selection
            total_loss += tf.reduce_mean(selection_loss_per_example *
                                         (1.0 - aggregate_mask))

        ### Semi-supervised regression loss and supervised loss for aggregations
        #########################################################################

        if do_model_aggregation:
            # Note that `aggregate_mask` is None if the setting is supervised.
            per_example_additional_loss = _calculate_aggregation_loss(
                logits_aggregation, aggregate_mask, aggregation_function_id,
                config)

            if config.use_answer_as_supervision:
                # Add regression loss for numeric answers which require aggregation.
                answer_loss, large_answer_loss_mask = _calculate_regression_loss(
                    answer, aggregate_mask, dist_per_token, numeric_values,
                    numeric_values_scale, table_mask_float, logits_aggregation,
                    config)
                per_example_additional_loss += answer_loss
                # Zero loss for examples with answer_loss > cutoff.
                per_example_additional_loss *= large_answer_loss_mask

            total_loss += tf.reduce_mean(per_example_additional_loss)

        return Outputs(
            total_loss=total_loss,
            logits=logits,
            probs=_get_probs(dist_per_token) * input_mask_float,
            logits_aggregation=logits_aggregation,
            logits_cls=logits_cls,
            span_indexes=span_indexes,
            span_logits=span_logits,
        )
Esempio n. 27
0
def elements_model(elements_texts_enc, feature_map, output_size, elements_mask,
                   ref_enc, flags):
    """The part of the model that processes the elements text and boxes.

  This assumes that the text has already been preprocessed with the text_model.
  Even if you are only using the elements and not the referring expression, you
  should probably use the ref_elements_model since that also handles
  preprocessing with the text_model.

  Args:
    elements_texts_enc: The elements text encoded by the text_model. Size:
      [batch_size * elements_per_query, text_embed_size]
    feature_map: Features used by the model.
    output_size: Desired output size of the encoding. Format: [length, width,
      depth]
    elements_mask: Mask for what elements items exist in the input.
    ref_enc: The referring expression encoded by the text_model. [batch_size,
      text_embed_size]
    flags: The input Flags.

  Returns:
    The encoding of the elements data.
  """

    with tf.variable_scope('elements_model'):
        elements_item_size = output_size[2]

        if flags.use_elements_boxes:
            elements_boxes = tf.identity(feature_map[ELEMENTS_BOX_ID],
                                         ELEMENTS_BOX_ID)
            flat_elements_boxes = tf.boolean_mask(elements_boxes,
                                                  elements_mask)
        else:
            elements_boxes = None
            flat_elements_boxes = None

        if ref_enc is not None:
            ref_enc_tile = tile_ref_enc_to_elements(ref_enc, elements_mask)

        elements_ref_match_enc = None
        if flags.use_elements_ref_match:
            elements_ref_match = tf.identity(
                feature_map[ELEMENTS_REF_MATCH_ID], ELEMENTS_REF_MATCH_ID)
            tf.summary.text('elements_ref_match', elements_ref_match)
            flat_elements_ref_match = tf.boolean_mask(elements_ref_match,
                                                      elements_mask)

            elements_ref_match_enc = text_model(
                flat_elements_ref_match,
                flags.pretrained_elements_ref_match_model)

        # For combinding the element with the refering expression.
        if flags.merge_ref_elements_method == 'combine' and (ref_enc
                                                             is not None):
            elements_enc = tf.concat(
                filter_none([
                    elements_texts_enc, flat_elements_boxes, ref_enc_tile,
                    elements_ref_match_enc
                ]), 1)
            elements_enc = tf.layers.dense(elements_enc,
                                           elements_item_size * 2, tf.nn.relu)
        else:
            # Paper results
            elements_enc = tf.concat(
                filter_none([
                    elements_texts_enc, flat_elements_boxes,
                    elements_ref_match_enc
                ]), 1)
            elements_enc = tf.layers.dense(elements_enc, elements_item_size,
                                           tf.nn.relu)

        neighbor_embed = None
        if flags.use_elements_neighbors:
            neighbor_embed = calc_neighbor_embed(
                feature_map[ELEMENTS_NEIGHBORS_ID], elements_enc,
                elements_mask)

        elements_enc = tf.concat(filter_none([elements_enc, neighbor_embed]),
                                 1)

        elements_enc = tf.layers.dense(elements_enc, elements_item_size,
                                       tf.nn.relu)

        attend_in = elements_enc

        # "DNN"
        elements_enc = tf.nn.dropout(elements_enc, flags.elements_keep_prob)
        elements_enc = tf.layers.dense(elements_enc, elements_item_size,
                                       tf.nn.relu)
        elements_enc = tf.nn.dropout(elements_enc, flags.elements_keep_prob)
        elements_enc = tf.layers.dense(elements_enc, elements_item_size)

        elements_enc_pre_atten = elements_enc

        if 'Atten' in flags.merge_ref_elements_method and (ref_enc
                                                           is not None):
            with tf.variable_scope('attention'):
                if elements_texts_enc is None:
                    # Prepad with 0s so the box embedding won't overlap with the ref_enc.
                    single_dot_concat = tf.zeros([
                        tf.shape(flat_elements_boxes)[0],
                        ref_enc.get_shape().as_list()[1]
                    ])
                else:
                    single_dot_concat = elements_texts_enc
                single_dot_in = tf.concat(
                    filter_none([
                        single_dot_concat,
                        flat_elements_boxes,
                        neighbor_embed,
                        elements_ref_match_enc,
                    ]), 1)
                single_dot_in = tf.concat(
                    [single_dot_in,
                     tf.ones([tf.shape(single_dot_in)[0], 1])], 1)

                attention_mask = attention(ref_enc, attend_in, single_dot_in,
                                           elements_mask, True,
                                           flags.merge_ref_elements_method,
                                           flags)

                attention_mask = tf.expand_dims(attention_mask, 1)

                elements_enc *= attention_mask

        # Projects the element embeddings into a 2d feature map.
        if flags.elements_proj_mode != 'tile':
            with tf.variable_scope('elements_proj'):
                # Projects the elements text onto the image feature map
                # on the corresponding bounding boxes.

                assert_op = tf.Assert(tf.equal(
                    output_size[0], output_size[1]), [
                        'Assumes height and width are the same.',
                        feature_map[ELEMENTS_BOX_ID]
                    ])
                with tf.control_dependencies([assert_op]):
                    if flags.proj_elements_memop:
                        # Iterate through all bounding boxes and embeddings to create
                        # embedded bounding boxes and sum to result vector iterately
                        elements_enc = undo_mask(elements_enc, elements_mask)

                        fold_elms = tf.transpose(
                            tf.concat([elements_enc, elements_boxes], 2),
                            [1, 0, 2])

                        initializer = tf.zeros([tf.shape(elements_mask)[0]] +
                                               output_size)

                        def fold_fn(total, fold_elm):
                            elements_enc_boxes = tf.split(
                                fold_elm, [
                                    tf.shape(elements_enc)[2],
                                    tf.shape(elements_boxes)[2]
                                ], 1)
                            return total + get_filled_rect(
                                elements_enc_boxes[1], elements_enc_boxes[0],
                                output_size[0], flags.elements_proj_mode)

                        elements_enc = tf.foldl(fold_fn,
                                                fold_elms,
                                                initializer=initializer,
                                                swap_memory=True,
                                                parallel_iterations=2)

                    else:
                        # Create embedding of all bb then reduce sum
                        elements_enc = get_filled_rect(
                            flat_elements_boxes, elements_enc, output_size[0],
                            flags.elements_proj_mode)

                        elements_enc = undo_mask(elements_enc, elements_mask)

                        elements_enc = tf.reduce_sum(elements_enc, axis=1)

                # Turn sum into average.
                mask_sum = tf.cast(
                    tf.reduce_sum(tf.cast(elements_mask, tf.uint8), 1),
                    tf.float32)
                mask_sum = tf.reshape(mask_sum, [-1, 1, 1, 1])
                mask_sum = tf.where(tf.equal(mask_sum, 0),
                                    tf.ones_like(mask_sum), mask_sum)
                elements_enc /= mask_sum
                tf.summary.histogram('elements_enc', elements_enc)

                elements_enc_for_disp = tf.reduce_mean(elements_enc,
                                                       3,
                                                       keepdims=True)
                tf.summary.image('elements_enc_for_disp',
                                 elements_enc_for_disp, 4)
        else:
            # Undo the mask for feature mapping
            sequence_elements_enc = undo_mask(elements_enc, elements_mask)

            elements_enc = tf.reduce_mean(sequence_elements_enc, axis=1)
            tf.summary.histogram('elements_enc', elements_enc)

            if flags.elements_3d_output:
                elements_enc = tile_to_image(elements_enc, output_size)

        if flags.elements_3d_output:
            elements_enc.set_shape(
                [None, output_size[0], output_size[1], elements_item_size])

        # Last CNN layer of elements model
        if flags.elements_3d_output and flags.elements_cnn:
            elements_enc = tf.layers.conv2d(elements_enc,
                                            elements_enc.shape[3],
                                            3,
                                            padding='SAME',
                                            activation=tf.nn.relu,
                                            strides=1)
            elements_enc = tf.nn.dropout(elements_enc,
                                         flags.elements_keep_prob)
            elements_enc = tf.layers.conv2d(elements_enc,
                                            elements_enc.shape[3],
                                            3,
                                            padding='SAME',
                                            activation=None,
                                            strides=1)

        return elements_enc, elements_enc_pre_atten
Esempio n. 28
0
def _calculate_eval_metrics_fn(
    loss,
    label_ids,
    logits,
    input_mask,
    aggregation_function_id,
    logits_aggregation,
    classification_class_index,
    logits_cls,
):
    """Calculates metrics for both cells and aggregation functions."""
    logits.shape.assert_has_rank(2)
    label_ids.shape.assert_has_rank(2)

    # <int32>[batch size, seq_length]
    predictions = tf.where(logits >= 0, tf.ones_like(logits, dtype=tf.int32),
                           tf.zeros_like(logits, dtype=tf.int32))
    input_mask_float = tf.cast(input_mask, tf.float32)

    loss = tf.metrics.mean(values=loss)

    # <bool>[batch size, seq_length]
    token_correct = tf.logical_or(tf.equal(label_ids, predictions),
                                  tf.logical_not(tf.cast(input_mask, tf.bool)))
    # <bool>[batch size]
    per_sequence_accuracy = tf.reduce_all(token_correct, axis=1)
    sequence_accuracy = tf.metrics.mean(values=per_sequence_accuracy)
    mean_label = tf.metrics.mean(values=tf.cast(label_ids, tf.float32),
                                 weights=input_mask_float)

    metrics = {
        "eval_loss": loss,
        "eval_sequence_accuracy": sequence_accuracy,
        "eval_mean_label": mean_label,
    }

    if logits_cls is not None:
        # <int32>[batch size]
        predictions_cls = tf.argmax(logits_cls, axis=-1, output_type=tf.int32)
        accuracy_cls = tf.metrics.accuracy(labels=classification_class_index,
                                           predictions=predictions_cls)
        mean_per_class_accuracy_cls = tf.metrics.mean_per_class_accuracy(
            labels=classification_class_index,
            predictions=predictions_cls,
            num_classes=logits_cls.shape[-1].value)
        metrics.update({
            "eval_classification_accuracy":
            accuracy_cls,
            "eval_mean_per_class_classification_accuracy":
            mean_per_class_accuracy_cls,
        })

    if logits_aggregation is not None:
        # <int32>[batch size]
        predictions_agg = tf.argmax(logits_aggregation,
                                    axis=-1,
                                    output_type=tf.int32)
        accuracy_agg = tf.metrics.accuracy(labels=aggregation_function_id,
                                           predictions=predictions_agg)
        # <bool>[batch size]
        per_sequence_agg_accuracy = tf.equal(aggregation_function_id,
                                             predictions_agg)
        # Whether cells and aggregation function predictions are both correct.
        per_sequence_joint_accuracy = tf.logical_and(per_sequence_agg_accuracy,
                                                     per_sequence_accuracy)
        joint_accuracy = tf.metrics.mean(values=per_sequence_joint_accuracy)
        metrics.update({
            "eval_aggregation_accuracy": accuracy_agg,
            "eval_joint_accuracy": joint_accuracy,
        })
    return metrics
Esempio n. 29
0
def blackout_pixel_weights_by_box_regions(height,
                                          width,
                                          boxes,
                                          blackout,
                                          weights=None):
    """Apply weights at pixel locations.

  This function is used to generate the pixel weight mask (usually in the output
  image dimension). The mask is to ignore some regions when computing loss.

  Weights are applied as follows:
  - Any region outside of a box gets the default weight 1.0
  - Any box for which an explicit weight is specifed gets that weight. If
    multiple boxes overlap, the maximum of the weights is applied.
  - Any box for which blackout=True is specified will get a weight of 0.0,
    regardless of whether an equivalent non-zero weight is specified. Also, the
    blackout region takes precedence over other boxes which may overlap with
    non-zero weight.

    Example:
    height = 4
    width = 4
    boxes = [[0., 0., 2., 2.],
             [0., 0., 4., 2.],
             [3., 0., 4., 4.]]
    blackout = [False, False, True]
    weights = [4.0, 3.0, 2.0]
    blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
                                          weights)
    >> [[4.0, 4.0, 1.0, 1.0],
        [4.0, 4.0, 1.0, 1.0],
        [3.0, 3.0, 1.0, 1.0],
        [0.0, 0.0, 0.0, 0.0]]


  Args:
    height: int, height of the (output) image.
    width: int, width of the (output) image.
    boxes: A float tensor with shape [num_instances, 4] indicating the
      coordinates of the four corners of the boxes.
    blackout: A boolean tensor with shape [num_instances] indicating whether to
      blackout (zero-out) the weights within the box regions.
    weights: An optional float32 tensor with shape [num_instances] indicating
      a value to apply in each box region. Note that if blackout=True for a
      given box, the weight will be zero. If None, all weights are assumed to be
      1.

  Returns:
    A float tensor with shape [height, width] where all values within the
    regions of the blackout boxes are 0.0 and 1.0 (or weights if supplied)
    elsewhere.
  """
    num_instances, _ = shape_utils.combined_static_and_dynamic_shape(boxes)
    # If no annotation instance is provided, return all ones (instead of
    # unexpected values) to avoid NaN loss value.
    if num_instances == 0:
        return tf.ones([height, width], dtype=tf.float32)

    (y_grid, x_grid) = image_shape_to_grids(height, width)
    y_grid = tf.expand_dims(y_grid, axis=0)
    x_grid = tf.expand_dims(x_grid, axis=0)
    y_min = tf.expand_dims(boxes[:, 0:1], axis=-1)
    x_min = tf.expand_dims(boxes[:, 1:2], axis=-1)
    y_max = tf.expand_dims(boxes[:, 2:3], axis=-1)
    x_max = tf.expand_dims(boxes[:, 3:], axis=-1)

    # Make the mask with all 1.0 in the box regions.
    # Shape: [num_instances, height, width]
    in_boxes = tf.math.logical_and(
        tf.math.logical_and(y_grid >= y_min, y_grid < y_max),
        tf.math.logical_and(x_grid >= x_min, x_grid < x_max))

    if weights is None:
        weights = tf.ones_like(blackout, dtype=tf.float32)

    # Compute a [height, width] tensor with the maximum weight in each box, and
    # 0.0 elsewhere.
    weights_tiled = tf.tile(weights[:, tf.newaxis, tf.newaxis],
                            [1, height, width])
    weights_3d = tf.where(in_boxes, weights_tiled,
                          tf.zeros_like(weights_tiled))
    weights_2d = tf.math.maximum(tf.math.reduce_max(weights_3d, axis=0), 0.0)

    # Add 1.0 to all regions outside a box.
    weights_2d = tf.where(tf.math.reduce_any(in_boxes, axis=0), weights_2d,
                          tf.ones_like(weights_2d))

    # Now enforce that blackout regions all have zero weights.
    keep_region = tf.cast(tf.math.logical_not(blackout), tf.float32)
    keep_region_tiled = tf.tile(keep_region[:, tf.newaxis, tf.newaxis],
                                [1, height, width])
    keep_region_3d = tf.where(in_boxes, keep_region_tiled,
                              tf.ones_like(keep_region_tiled))
    keep_region_2d = tf.math.reduce_min(keep_region_3d, axis=0)
    return weights_2d * keep_region_2d
Esempio n. 30
0
def _stitch(features):
    """Stitch features on the first dimension."""
    full_mask = tf.greater(features['task'], 1)
    step_mask = tf.reduce_any(full_mask, axis=-1)
    step_mask_exclude_last = tf.pad(step_mask, [[0, 0], [0, 1]],
                                    constant_values=False)[:, 1:]
    num_sequences = common_layers.shape_list(features['task'])[0]
    num_steps = common_layers.shape_list(features['task'])[1]
    connectors = tf.constant(PADDED_CONCATENATORS)
    # Select connectors
    connector_indices = tf.random.uniform([num_sequences * num_steps],
                                          minval=0,
                                          maxval=len(PADDED_CONCATENATORS),
                                          dtype=tf.int32)
    selected_connectors = tf.reshape(
        tf.gather(connectors, connector_indices),
        [num_sequences, num_steps,
         len(PADDED_CONCATENATORS[0])])
    selected_connectors = tf.multiply(selected_connectors,
                                      tf.expand_dims(
                                          tf.to_int32(step_mask_exclude_last),
                                          2),
                                      name='connector_mask')
    features['task'] = tf.concat([features['task'], selected_connectors],
                                 axis=-1)
    ref_offsets = tf.expand_dims(
        tf.cumsum(tf.reduce_sum(tf.to_int32(tf.greater(features['task'], 1)),
                                -1),
                  exclusive=True,
                  axis=-1), 2)
    features['task'] = tf.reshape(features['task'], [num_sequences, -1])
    full_mask = tf.greater(features['task'], 1)
    full_mask_int = tf.to_int32(full_mask)
    indices = tf.where(
        tf.sequence_mask(lengths=tf.reduce_sum(full_mask_int, -1)))
    values = tf.boolean_mask(tf.reshape(features['task'], [-1]),
                             tf.reshape(full_mask, [-1]))
    sparse_task = tf.sparse.SparseTensor(indices=indices,
                                         values=values,
                                         dense_shape=tf.to_int64(
                                             tf.shape(features['task'])))
    # Stitch task and raw_task
    stitched_features = {}
    stitched_features['task'] = tf.sparse_tensor_to_dense(sparse_task)
    max_len = tf.reduce_max(
        tf.reduce_sum(tf.to_int32(tf.greater(stitched_features['task'], 1)),
                      -1))
    stitched_features['task'] = stitched_features['task'][:, :max_len]
    if 'raw_task' in features:
        connector_strs = tf.reshape(
            tf.gather(tf.constant(CONCATENATORS_STR), connector_indices),
            [num_sequences, num_steps])
        masked_connector_strs = tf.where(step_mask_exclude_last,
                                         connector_strs,
                                         tf.fill(tf.shape(connector_strs), ''))
        stitched_features['raw_task'] = tf.strings.reduce_join(
            tf.strings.reduce_join(tf.concat([
                tf.expand_dims(features['raw_task'], 2),
                tf.expand_dims(masked_connector_strs, 2)
            ],
                                             axis=2),
                                   axis=-1), -1)
    # Stitch screen sequences
    action_lengths = tf.reduce_sum(
        tf.to_int32(
            tf.greater(features['verb_refs'][:, :, 0, 1],
                       features['verb_refs'][:, :, 0, 0])), -1)
    max_action_length = tf.reduce_max(action_lengths)

    def _pad(tensor, padding_value=0):
        shape_list = common_layers.shape_list(tensor)
        assert len(shape_list) >= 2
        padding_list = [[0, 0], [0, 1]] + [[0, 0]] * (len(shape_list) - 2)
        return tf.pad(tensor[:, :max_action_length],
                      padding_list,
                      constant_values=padding_value)

    for key in features.keys():
        if key.endswith('_refs'):
            features[key] = tf.squeeze(features[key], 2)
            ref_mask = tf.expand_dims(
                tf.to_int32(
                    tf.not_equal(features[key][:, :, 0], features[key][:, :,
                                                                       1])), 2)
            stitched_features[key] = tf.multiply((features[key] + ref_offsets),
                                                 ref_mask,
                                                 name='ref_mask')
            stitched_features[key] = _pad(stitched_features[key])
        elif key in [
                'verbs', 'objects', 'consumed', 'obj_dom_pos', 'obj_text',
                'obj_type', 'obj_clickable', 'obj_screen_pos', 'verb_refs',
                'obj_refs', 'input_refs', 'obj_dom_dist'
        ]:
            features[key] = tf.squeeze(features[key], 2)
            stitched_features[key] = features[key]
            stitched_features[key] = _pad(
                stitched_features[key],
                padding_value=-1 if key == 'obj_type' else 0)
        elif key not in ['task', 'raw_task']:
            stitched_features[key] = features[key][:, 0]
    # Append eos to 'task'
    stitched_features['task'] = tf.pad(stitched_features['task'],
                                       [[0, 0], [0, 1]])
    task_mask = tf.to_int32(tf.greater(stitched_features['task'], 1))
    task_eos_mask = tf.pad(task_mask, [[0, 0], [1, 0]],
                           constant_values=1)[:, :-1]
    stitched_features['task'] = stitched_features['task'] + (task_eos_mask -
                                                             task_mask)
    # Append eos
    verb_mask = tf.to_int32(tf.greater(stitched_features['verbs'], 1))
    verb_eos_mask = tf.pad(verb_mask, [[0, 0], [1, 0]],
                           constant_values=1)[:, :-1]
    verb_eos = verb_eos_mask - verb_mask
    stitched_features['verbs'] = stitched_features['verbs'] + verb_eos
    # Append last step refs to 'verb_refs'
    task_lengths = tf.where(tf.equal(stitched_features['task'], 1))[:, 1]
    eos_pos = tf.to_int32(tf.stack([task_lengths, task_lengths + 1], axis=1))
    action_mask = tf.to_int32(
        tf.sequence_mask(action_lengths, max_action_length + 1))
    action_and_eos_mask = tf.pad(action_mask, [[0, 0], [1, 0]],
                                 constant_values=1)[:, :-1]
    verb_ref_eos = action_and_eos_mask - action_mask
    eos_refs = tf.multiply(tf.tile(tf.expand_dims(eos_pos, 1),
                                   [1, max_action_length + 1, 1]),
                           tf.expand_dims(verb_ref_eos, 2),
                           name='verb_ref_eos')
    stitched_features['verb_refs'] += eos_refs
    return stitched_features