def flip_randomly_points_and_normals_motions(points, normals, motions,
                                             is_training):
    """Flip points and normals against x or/and y axis.

  Args:
    points: A tf.float32 tensor of size [N, 3] containing points.
    normals: A tf.float32 tensor of size [N, 3] containing points or None.
    motions: A tf.float32 tensor of size [N, 3] containing motion vectors or
      None.
    is_training: True if in training stage. Random flipping only takes place
      during training.

  Returns:
    flipped_points: Flipped points. A tf.float32 tensor of size [N, 3].
    flipped_normals: Flipped normals. A tf.float32 tensor of size [N, 3]. It
      will be None of the normals is None.
  """
    if is_training:
        x_cond = tf.greater(
            tf.random.uniform([], minval=0.0, maxval=1.0, dtype=tf.float32),
            0.5)
        x_rotate = tf.cond(x_cond, lambda: tf.constant(1.0, dtype=tf.float32),
                           lambda: tf.constant(-1.0, dtype=tf.float32))
        y_cond = tf.greater(
            tf.random.uniform([], minval=0.0, maxval=1.0, dtype=tf.float32),
            0.5)
        y_rotate = tf.cond(y_cond, lambda: tf.constant(1.0, dtype=tf.float32),
                           lambda: tf.constant(-1.0, dtype=tf.float32))
        (points, normals,
         motions) = flip_points_and_normals_motions(points=points,
                                                    normals=normals,
                                                    motions=motions,
                                                    x_rotate=x_rotate,
                                                    y_rotate=y_rotate)
    return points, normals, motions
Beispiel #2
0
def select_slate_greedy(slate_size, s_no_click, s, q):
    """Selects the slate using the adaptive greedy algorithm.

  This algorithm corresponds to the method "GS" in
  Ie et al. https://arxiv.org/abs/1905.12767.

  Args:
    slate_size: int, the size of the recommendation slate.
    s_no_click: float tensor, the score for not clicking any document.
    s: [num_of_documents] tensor, the scores for clicking documents.
    q: [num_of_documents] tensor, the predicted q values for documents.

  Returns:
    [slate_size] tensor, the selected slate.
  """
    def argmax(v, mask):
        return tf.argmax((v - tf.reduce_min(v) + 1) * mask, axis=0)

    numerator = tf.constant(0.)
    denominator = tf.constant(0.) + s_no_click
    mask = tf.ones(tf.shape(q)[0])

    def set_element(v, i, x):
        mask = tf.one_hot(i, tf.shape(v)[0])
        v_new = tf.ones_like(v) * x
        return tf.where(tf.equal(mask, 1), v_new, v)

    for _ in range(slate_size):
        k = argmax((numerator + s * q) / (denominator + s), mask)
        mask = set_element(mask, k, 0)
        numerator = numerator + tf.gather(s * q, k)
        denominator = denominator + tf.gather(s, k)

    output_slate = tf.where(tf.equal(mask, 0))
    return output_slate
 def get_metric_dictionary(self):
   metrics_dict = {}
   class_recall_list = []  # used for calculating mean pixel accuracy.
   class_iou_list = []     # used for calculating mean iou.
   for c in self.class_range:
     tp = self.true_positive_metrics[c].result()
     fp = self.false_positive_metrics[c].result()
     fn = self.false_negative_metrics[c].result()
     class_recall = tp / (tp + fn)
     class_precision = tf.where(
         tf.greater(tp + fn, 0.0), _safe_div(tp, (tp + fp)),
         tf.constant(np.NaN))
     class_iou = tf.where(
         tf.greater(tp + fn, 0.0), tp / (tp + fn + fp), tf.constant(np.NaN))
     class_recall_list.append(class_recall)
     class_iou_list.append(class_iou)
     class_name = _get_class_name(class_id=c, label_map=self.label_map)
     metrics_dict[self.eval_prefix +
                  '_recall/{}'.format(class_name)] = class_recall
     metrics_dict[self.eval_prefix +
                  '_precision/{}'.format(class_name)] = class_precision
     metrics_dict[self.eval_prefix + '_iou/{}'.format(class_name)] = class_iou
   mean_pixel_accuracy = _non_nan_mean(class_recall_list)
   mean_iou = _non_nan_mean(class_iou_list)
   metrics_dict[self.eval_prefix +
                '_avg/mean_pixel_accuracy'] = mean_pixel_accuracy
   metrics_dict[self.eval_prefix + '_avg/mean_iou'] = mean_iou
   return metrics_dict
def state_rewards(states,
                  actions,
                  rewards,
                  next_states,
                  contexts,
                  weight_index=None,
                  state_indices=None,
                  weight_vector=1.0,
                  offset_vector=0.0,
                  summarize=False):
  """Returns the rewards that are linear mapping of next_states.

  Args:
    states: A [batch_size, num_state_dims] Tensor representing a batch
        of states.
    actions: A [batch_size, num_action_dims] Tensor representing a batch
      of actions.
    rewards: A [batch_size] Tensor representing a batch of rewards.
    next_states: A [batch_size, num_state_dims] Tensor representing a batch
      of next states.
    contexts: A list of [batch_size, num_context_dims] Tensor representing
      a batch of contexts.
    weight_index: (integer) Index of contexts lists that specify weighting.
    state_indices: (a list of Numpy integer array) Indices of states dimensions
      to be mapped.
    weight_vector: (a number or a list or Numpy array) The weighting vector,
      broadcastable to `next_states`.
    offset_vector: (a number or a list of Numpy array) The off vector.
    summarize: (boolean) enable summary ops.

  Returns:
    A new tf.float32 [batch_size] rewards Tensor, and
      tf.float32 [batch_size] discounts tensor.
  """
  del states, actions, rewards  # unused args
  stats = {}
  record_tensor(next_states, state_indices, stats)
  next_states = index_states(next_states, state_indices)
  weight = tf.constant(
      weight_vector, dtype=next_states.dtype, shape=next_states[0].shape)
  weights = tf.expand_dims(weight, 0)
  offset = tf.constant(
      offset_vector, dtype=next_states.dtype, shape=next_states[0].shape)
  offsets = tf.expand_dims(offset, 0)
  if weight_index is not None:
    weights *= contexts[weight_index]
  rewards = tf.to_float(tf.reduce_sum(weights * (next_states+offsets), axis=1))
  if summarize:
    with tf.name_scope('RewardFn/'):
      summarize_stats(stats)
  return rewards, tf.ones_like(rewards)
 def __init__(self, ckpt_dir, save_epoch_freq=1, max_to_keep=3):
     self._ckpt_saved_epoch = tf.Variable(initial_value=tf.constant(
         -1, dtype=tf.dtypes.int64),
                                          name='ckpt_saved_epoch')
     self.ckpt_dir = ckpt_dir
     self.max_to_keep = max_to_keep
     self.save_epoch_freq = save_epoch_freq
def score_documents_tf(user_obs,
                       doc_obs,
                       no_click_mass=1.0,
                       is_mnl=False,
                       min_normalizer=-1.0):
    """Computes unnormalized scores given both user and document observations.

  This implements both multinomial proportional model and multinormial logit
    model given some parameters. We also assume scores are based on inner
    products of user_obs and doc_obs.

  Args:
    user_obs: An instance of AbstractUserState.
    doc_obs: A numpy array that represents the observation of all documents in
      the candidate set.
    no_click_mass: a float indicating the mass given to a no click option
    is_mnl: whether to use a multinomial logit model instead of a multinomial
      proportional model.
    min_normalizer: A float (<= 0) used to offset the scores to be positive when
      using multinomial proportional model.

  Returns:
    A float tensor that stores unnormalzied scores of documents and a float
      tensor that represents the score for the action of picking no document.
  """
    user_obs = tf.reshape(user_obs, [1, -1])
    scores = tf.reduce_sum(input_tensor=tf.multiply(user_obs, doc_obs), axis=1)
    all_scores = tf.concat([scores, tf.constant([no_click_mass])], axis=0)
    if is_mnl:
        all_scores = tf.nn.softmax(all_scores)
    else:
        all_scores = all_scores - min_normalizer
    return all_scores[:-1], all_scores[-1]
    def _build_select_slate_op(self):
        p_no_click = self._prob_no_click_ph
        p = self._doc_affinity_scores_ph
        q = self._net_outputs.q_values[0]
        with tf.name_scope('select_slate'):
            self._output_slate = self._select_slate_fn(self._slate_size,
                                                       p_no_click, p, q)

        self._output_slate = tf.Print(
            self._output_slate,
            [tf.constant('cp 1'), self._output_slate, p, q],
            summarize=10000)
        self._output_slate = tf.reshape(self._output_slate,
                                        (self._slate_size, ))

        self._action_counts = tf.get_variable(
            'action_counts',
            shape=[self._num_candidates],
            initializer=tf.zeros_initializer())
        output_slate = tf.reshape(self._output_slate, [-1])
        output_one_hot = tf.one_hot(output_slate, self._num_candidates)
        update_ops = []
        for i in range(self._slate_size):
            update_ops.append(
                tf.assign_add(self._action_counts, output_one_hot[i]))
        self._select_action_update_op = tf.group(*update_ops)
Beispiel #8
0
    def __call__(self, example_string):
        """Processes a single example string.

    Extracts and processes the image, and ignores the label. We assume that the
    image has three channels.

    Args:
      example_string: str, an Example protocol buffer.

    Returns:
      image_rescaled: the image, resized to `image_size x image_size` and
      rescaled to [-1, 1]. Note that Gaussian data augmentation may cause values
      to go beyond this range.
    """
        image_decoded = read_example_and_parse_image(example_string)['image']
        image_resized = tf.image.resize_images(
            image_decoded, [self.image_size, self.image_size],
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=True)
        image_resized = tf.cast(image_resized, tf.float32)
        image = 2 * (image_resized / 255.0 - 0.5)  # Rescale to [-1, 1].

        if self.data_augmentation is not None:
            if self.data_augmentation.enable_gaussian_noise:
                image = image + tf.random_normal(tf.shape(
                    image)) * self.data_augmentation.gaussian_noise_std

            if self.data_augmentation.enable_jitter:
                j = self.data_augmentation.jitter_amount
                paddings = tf.constant([[j, j], [j, j], [0, 0]])
                image = tf.pad(image, paddings, 'REFLECT')
                image = tf.image.random_crop(
                    image, [self.image_size, self.image_size, 3])

        return image
def _box_rotation_regression_loss(loss_type, is_balanced,
                                  input_boxes_rotation_matrix,
                                  input_boxes_instance_id,
                                  output_boxes_rotation_matrix, delta):
  """Computes regression loss on object rotations."""

  def fn():
    """Loss function for when number of input and output boxes is positive."""
    if is_balanced:
      weights = loss_utils.get_balanced_loss_weights_multiclass(
          labels=input_boxes_instance_id)
    else:
      weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                        dtype=tf.float32)
    gt_rotation_matrix = tf.reshape(input_boxes_rotation_matrix, [-1, 9])
    predicted_rotation_matrix = tf.reshape(output_boxes_rotation_matrix,
                                           [-1, 9])
    if loss_type == 'huber':
      loss_fn = tf.keras.losses.Huber(
          delta=delta, reduction=tf.keras.losses.Reduction.NONE)
    elif loss_type == 'absolute_difference':
      loss_fn = tf.keras.losses.MeanAbsoluteError(
          reduction=tf.keras.losses.Reduction.NONE)
    else:
      raise ValueError(('Unknown loss type %s.' % loss_type))
    rotation_losses = loss_fn(
        y_true=gt_rotation_matrix, y_pred=predicted_rotation_matrix)
    return tf.reduce_mean(rotation_losses * tf.reshape(weights, [-1]))

  cond_input = tf.greater(tf.shape(input_boxes_rotation_matrix)[0], 0)
  cond_output = tf.greater(tf.shape(output_boxes_rotation_matrix)[0], 0)
  cond = tf.logical_and(cond_input, cond_output)
  return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))
def _box_center_distance_loss_on_voxel_tensors_unbatched(
    inputs_1, outputs_1, loss_type, delta, is_balanced, is_intermediate):
  """Computes huber loss on predicted object centers for each voxel."""
  inputs_1, outputs_1, valid_mask = _get_voxels_valid_inputs_outputs(
      inputs_1=inputs_1, outputs_1=outputs_1)

  def loss_fn_unbatched():
    """Loss function."""
    if is_intermediate:
      output_boxes_center = outputs_1[standard_fields.DetectionResultFields
                                      .intermediate_object_center_voxels]
    else:
      output_boxes_center = outputs_1[
          standard_fields.DetectionResultFields.object_center_voxels]
    return _box_center_distance_loss(
        loss_type=loss_type,
        is_balanced=is_balanced,
        input_boxes_center=inputs_1[
            standard_fields.InputDataFields.object_center_voxels],
        input_boxes_instance_id=inputs_1[
            standard_fields.InputDataFields.object_instance_id_voxels],
        output_boxes_center=output_boxes_center,
        delta=delta)

  return tf.cond(
      tf.reduce_any(valid_mask),
      loss_fn_unbatched, lambda: tf.constant(0.0, dtype=tf.float32))
Beispiel #11
0
def per_voxel_point_sample_segment_func(data, segment_ids, num_segments,
                                        num_samples_per_voxel):
    """Samples features from the points within each voxel.

  Args:
    data: A tf.float32 tensor of size [N, F].
    segment_ids: A tf.int32 tensor of size [N].
    num_segments: Number of segments.
    num_samples_per_voxel: Number of features to sample per voxel. If the voxel
      has less number of points in it, the point features will be padded by 0.

  Returns:
    A tf.float32 tensor of size [num_segments, num_samples_per_voxel, F].
    A tf.int32 indices of size [N, num_samples_per_voxel].
  """
    num_channels = data.get_shape().as_list()[1]
    if num_channels is None:
        raise ValueError('num_channels is None.')
    n = tf.shape(segment_ids)[0]

    def _body_fn(i, indices_range, indices):
        """Computes the indices of the i-th point feature in each segment."""
        indices_i = tf.math.unsorted_segment_max(data=indices_range,
                                                 segment_ids=segment_ids,
                                                 num_segments=num_segments)
        indices_i_positive_mask = tf.greater(indices_i, 0)
        indices_i_positive = tf.boolean_mask(indices_i,
                                             indices_i_positive_mask)
        boolean_mask = tf.scatter_nd(indices=tf.cast(tf.expand_dims(
            indices_i_positive - 1, axis=1),
                                                     dtype=tf.int64),
                                     updates=tf.ones_like(indices_i_positive,
                                                          dtype=tf.int32),
                                     shape=(n, ))
        indices_range *= (1 - boolean_mask)
        indices_i *= tf.cast(indices_i_positive_mask, dtype=tf.int32)
        indices_i = tf.pad(tf.expand_dims(indices_i, axis=1),
                           paddings=[[0, 0],
                                     [i, num_samples_per_voxel - i - 1]])
        indices += indices_i
        i = i + 1
        return i, indices_range, indices

    cond = lambda i, indices_range, indices: i < num_samples_per_voxel

    (_, _, indices) = tf.while_loop(
        cond=cond,
        body=_body_fn,
        loop_vars=(tf.constant(0, dtype=tf.int32), tf.range(n) + 1,
                   tf.zeros([num_segments, num_samples_per_voxel],
                            dtype=tf.int32)))

    data = tf.pad(data, paddings=[[1, 0], [0, 0]])
    voxel_features = tf.gather(data, tf.reshape(indices, [-1]))
    return tf.reshape(voxel_features,
                      [num_segments, num_samples_per_voxel, num_channels])
Beispiel #12
0
    def __init__(self,
                 env_spec,
                 callbacks=None,
                 model_class=FullyConvModel,
                 optimizer=tf.train.AdamOptimizer,
                 learning_rate=0.0001,
                 discount=0.99,
                 trajectory_length=16,
                 batch_size=32,
                 max_grads_norm=100,
                 policy_factor=1,
                 entropy_factor=0.0001,
                 value_factor=0.5):
        self.callbacks = callbacks
        self.discount = discount
        self.policy_factor = policy_factor
        self.entropy_factor = entropy_factor
        self.value_factor = value_factor

        self.input_observations = {
            name: Input(shape=spec.shape, name='input_{}'.format(name))
            for name, spec in env_spec.observation_spec.items()
        }
        self.input_actions = {
            name: Input(shape=(),
                        name='input_arg_{}_value'.format(name),
                        dtype='int32')
            for name in env_spec.action_spec
        }
        self.input_returns = Input(shape=(), name='input_returns')

        self.function_args_mask = tf.constant(
            env_spec.action_spec['function_id'].args_mask,
            dtype=tf.float32,
            name='function_args_mask')

        self.model = model_class(self.input_observations, env_spec)

        self.loss = self.build_loss()

        self.optimizer = optimizer(learning_rate=learning_rate)
        grads, vars = zip(*self.optimizer.compute_gradients(self.loss))
        grads_norm = tf.global_norm(grads)
        if max_grads_norm > 0:
            grads, _ = tf.clip_by_global_norm(grads, max_grads_norm,
                                              grads_norm)
        self.train_op = self.optimizer.apply_gradients(
            zip(grads, vars), global_step=tf.train.get_or_create_global_step())

        self.history = History(trajectory_length, batch_size, env_spec)

        tf.summary.scalar('learning_rate', learning_rate)
        tf.summary.scalar('total_loss', self.loss, family='losses')
        tf.summary.scalar('grads_norm', grads_norm)
def reset_rewards(states,
                  actions,
                  rewards,
                  next_states,
                  contexts,
                  reset_index=0,
                  reset_state=None,
                  reset_reward_function=None,
                  include_forward_rewards=True,
                  include_reset_rewards=True):
  """Returns the rewards for a forward/reset agent.

  Args:
    states: A [batch_size, num_state_dims] Tensor representing a batch
        of states.
    actions: A [batch_size, num_action_dims] Tensor representing a batch
      of actions.
    rewards: A [batch_size] Tensor representing a batch of rewards.
    next_states: A [batch_size, num_state_dims] Tensor representing a batch
      of next states.
    contexts: A list of [batch_size, num_context_dims] Tensor representing
      a batch of contexts.
    reset_index: (integer) The context list index that specifies reset.
    reset_state: Reset state.
    reset_reward_function: Reward function for reset step.
    include_forward_rewards: Include the rewards from the forward pass.
    include_reset_rewards: Include the rewards from the reset pass.

  Returns:
    A new tf.float32 [batch_size] rewards Tensor, and
      tf.float32 [batch_size] discounts tensor.
  """
  reset_state = tf.constant(
      reset_state, dtype=next_states.dtype, shape=next_states.shape)
  reset_states = tf.expand_dims(reset_state, 0)

  def true_fn():
    if include_reset_rewards:
      return reset_reward_function(states, actions, rewards, next_states,
                                   [reset_states] + contexts[1:])
    else:
      return tf.zeros_like(rewards), tf.ones_like(rewards)

  def false_fn():
    if include_forward_rewards:
      return plain_rewards(states, actions, rewards, next_states, contexts)
    else:
      return tf.zeros_like(rewards), tf.ones_like(rewards)

  rewards, discounts = tf.cond(
      tf.cast(contexts[reset_index][0, 0], dtype=tf.bool), true_fn, false_fn)
  return rewards, discounts
def box_corner_distance_loss_on_object_tensors(inputs,
                                               outputs,
                                               loss_type,
                                               delta=1.0,
                                               is_balanced=False):
    """Computes regression loss on object corner locations using object tensors.

  Args:
    inputs: A dictionary of tf.Tensors with our input data.
    outputs: A dictionary of tf.Tensors with the network output.
    loss_type: Loss type.
    delta: float, the voxel where the huber loss function changes from a
      quadratic to linear.
    is_balanced: If True, the per-voxel losses are re-weighted to have equal
      total weight for each object instance.

  Returns:
    localization_loss: A tf.float32 scalar corresponding to localization loss.
  """
    def fn(inputs_1, outputs_1):
        return _box_corner_distance_loss_on_object_tensors(
            inputs=inputs_1,
            outputs=outputs_1,
            loss_type=loss_type,
            delta=delta,
            is_balanced=is_balanced)

    batch_size = len(inputs[standard_fields.InputDataFields.objects_length])
    losses = []
    for b in range(batch_size):
        inputs_1 = batch_utils.get_batch_size_1_input_objects(inputs=inputs,
                                                              b=b)
        outputs_1 = batch_utils.get_batch_size_1_output_objects(
            outputs=outputs, b=b)
        cond_input = tf.greater(
            tf.shape(
                inputs_1[standard_fields.InputDataFields.objects_length])[0],
            0)
        cond_output = tf.greater(
            tf.shape(outputs_1[
                standard_fields.DetectionResultFields.objects_length])[0], 0)
        cond = tf.logical_and(cond_input, cond_output)
        # pylint: disable=cell-var-from-loop
        loss = tf.cond(cond,
                       lambda: fn(inputs_1=inputs_1, outputs_1=outputs_1),
                       lambda: tf.constant(0.0, dtype=tf.float32))
        # pylint: enable=cell-var-from-loop
        losses.append(loss)
    return tf.reduce_mean(tf.stack(losses))
def index_states(states, indices):
  """Return indexed states.

  Args:
    states: A [batch_size, num_state_dims] Tensor representing a batch
        of states.
    indices: (a list of Numpy integer array) Indices of states dimensions
      to be mapped.
  Returns:
    A [batch_size, num_indices] Tensor representing the batch of indexed states.
  """
  if indices is None:
    return states
  indices = tf.constant(indices, dtype=tf.int32)
  return tf.gather(states, indices=indices, axis=1)
def _box_corner_distance_loss_on_object_tensors(inputs, outputs, loss_type,
                                                delta, is_balanced):
    """Computes huber loss on object corner locations."""
    valid_mask_class = tf.greater(
        tf.reshape(inputs[standard_fields.InputDataFields.objects_class],
                   [-1]), 0)
    valid_mask_instance = tf.greater(
        tf.reshape(inputs[standard_fields.InputDataFields.objects_instance_id],
                   [-1]), 0)
    valid_mask = tf.logical_and(valid_mask_class, valid_mask_instance)

    def fn():
        for field in standard_fields.get_input_object_fields():
            if field in inputs:
                inputs[field] = tf.boolean_mask(inputs[field], valid_mask)
        for field in standard_fields.get_output_object_fields():
            if field in outputs:
                outputs[field] = tf.boolean_mask(outputs[field], valid_mask)
        return _box_corner_distance_loss(
            loss_type=loss_type,
            is_balanced=is_balanced,
            input_boxes_length=inputs[
                standard_fields.InputDataFields.objects_length],
            input_boxes_height=inputs[
                standard_fields.InputDataFields.objects_height],
            input_boxes_width=inputs[
                standard_fields.InputDataFields.objects_width],
            input_boxes_center=inputs[
                standard_fields.InputDataFields.objects_center],
            input_boxes_rotation_matrix=inputs[
                standard_fields.InputDataFields.objects_rotation_matrix],
            input_boxes_instance_id=inputs[
                standard_fields.InputDataFields.objects_instance_id],
            output_boxes_length=outputs[
                standard_fields.DetectionResultFields.objects_length],
            output_boxes_height=outputs[
                standard_fields.DetectionResultFields.objects_height],
            output_boxes_width=outputs[
                standard_fields.DetectionResultFields.objects_width],
            output_boxes_center=outputs[
                standard_fields.DetectionResultFields.objects_center],
            output_boxes_rotation_matrix=outputs[
                standard_fields.DetectionResultFields.objects_rotation_matrix],
            delta=delta)

    return tf.cond(tf.reduce_any(valid_mask), fn,
                   lambda: tf.constant(0.0, dtype=tf.float32))
    def _build_train_op(self):
        """Builds a training op.

    Returns:
      An op performing one step of training from replay data.
    """
        # click_indicator: [B, S]
        # q_values: [B, A]
        # actions: [B, S]
        # slate_q_values: [B, S]
        # replay_click_q: [B]
        click_indicator = self._replay.rewards[:, :,
                                               self._click_response_index]
        slate_q_values = tf.compat.v1.batch_gather(
            self._replay_net_outputs.q_values,
            tf.cast(self._replay.actions, dtype=tf.int32))
        # Only get the Q from the clicked document.
        replay_click_q = tf.reduce_sum(input_tensor=slate_q_values *
                                       click_indicator,
                                       axis=1,
                                       name='replay_click_q')

        target = tf.stop_gradient(self._build_target_q_op())

        clicked = tf.reduce_sum(input_tensor=click_indicator, axis=1)
        clicked_indices = tf.squeeze(tf.compat.v1.where(tf.equal(clicked, 1)),
                                     axis=1)
        # clicked_indices is a vector and tf.gather selects the batch dimension.
        q_clicked = tf.gather(replay_click_q, clicked_indices)
        target_clicked = tf.gather(target, clicked_indices)

        def get_train_op():
            loss = tf.reduce_mean(input_tensor=tf.square(q_clicked -
                                                         target_clicked))
            if self.summary_writer is not None:
                with tf.compat.v1.variable_scope('Losses'):
                    tf.compat.v1.summary.scalar('Loss', loss)

            return loss

        loss = tf.cond(pred=tf.greater(tf.reduce_sum(input_tensor=clicked), 0),
                       true_fn=get_train_op,
                       false_fn=lambda: tf.constant(0.),
                       name='')

        return self.optimizer.minimize(loss)
def process_example(example_string, image_size, data_augmentation=None):
    """Processes a single example string.

  Extracts and processes the image, and ignores the label. We assume that the
  image has three channels.

  Args:
    example_string: str, an Example protocol buffer.
    image_size: int, desired image size. The extracted image will be resized to
      `[image_size, image_size]`.
    data_augmentation: A DataAugmentation object with parameters for perturbing
      the images.

  Returns:
    image_rescaled: the image, resized to `image_size x image_size` and rescaled
      to [-1, 1]. Note that Gaussian data augmentation may cause values to
      go beyond this range.
  """
    image_string = tf.parse_single_example(example_string,
                                           features={
                                               'image':
                                               tf.FixedLenFeature(
                                                   [], dtype=tf.string),
                                               'label':
                                               tf.FixedLenFeature([], tf.int64)
                                           })['image']
    image_decoded = tf.image.decode_jpeg(image_string, channels=3)
    image_resized = tf.image.resize_images(
        image_decoded, [image_size, image_size],
        method=tf.image.ResizeMethod.BILINEAR,
        align_corners=True)
    image = 2 * (image_resized / 255.0 - 0.5)  # Rescale to [-1, 1].

    if data_augmentation is not None:
        if data_augmentation.enable_gaussian_noise:
            image = image + tf.random_normal(
                tf.shape(image)) * data_augmentation.gaussian_noise_std

        if data_augmentation.enable_jitter:
            j = data_augmentation.jitter_amount
            paddings = tf.constant([[j, j], [j, j], [0, 0]])
            image = tf.pad(image, paddings, 'REFLECT')
            image = tf.image.random_crop(image, [image_size, image_size, 3])

    return image
def _box_size_regression_loss_on_voxel_tensors_unbatched(
        inputs_1, outputs_1, loss_type, delta, is_balanced, is_intermediate):
    """Computes regression loss on predicted object size for each voxel."""
    inputs_1, outputs_1, valid_mask = _get_voxels_valid_inputs_outputs(
        inputs_1=inputs_1, outputs_1=outputs_1)

    def loss_fn_unbatched():
        """Loss function."""
        if is_intermediate:
            output_boxes_length = outputs_1[
                standard_fields.DetectionResultFields.
                intermediate_object_length_voxels]
            output_boxes_height = outputs_1[
                standard_fields.DetectionResultFields.
                intermediate_object_height_voxels]
            output_boxes_width = outputs_1[
                standard_fields.DetectionResultFields.
                intermediate_object_width_voxels]
        else:
            output_boxes_length = outputs_1[
                standard_fields.DetectionResultFields.object_length_voxels]
            output_boxes_height = outputs_1[
                standard_fields.DetectionResultFields.object_height_voxels]
            output_boxes_width = outputs_1[
                standard_fields.DetectionResultFields.object_width_voxels]
        return _box_size_regression_loss(
            loss_type=loss_type,
            is_balanced=is_balanced,
            input_boxes_length=inputs_1[
                standard_fields.InputDataFields.object_length_voxels],
            input_boxes_height=inputs_1[
                standard_fields.InputDataFields.object_height_voxels],
            input_boxes_width=inputs_1[
                standard_fields.InputDataFields.object_width_voxels],
            input_boxes_instance_id=inputs_1[
                standard_fields.InputDataFields.object_instance_id_voxels],
            output_boxes_length=output_boxes_length,
            output_boxes_height=output_boxes_height,
            output_boxes_width=output_boxes_width,
            delta=delta)

    return tf.cond(tf.reduce_any(valid_mask), loss_fn_unbatched,
                   lambda: tf.constant(0.0, dtype=tf.float32))
def _box_size_regression_loss(loss_type, is_balanced, input_boxes_length,
                              input_boxes_height, input_boxes_width,
                              input_boxes_instance_id, output_boxes_length,
                              output_boxes_height, output_boxes_width, delta):
  """Computes regression loss on object sizes."""

  def fn():
    """Loss function for when number of input and output boxes is positive."""
    if is_balanced:
      weights = loss_utils.get_balanced_loss_weights_multiclass(
          labels=input_boxes_instance_id)
    else:
      weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                        dtype=tf.float32)
    gt_length = tf.reshape(input_boxes_length, [-1, 1])
    gt_height = tf.reshape(input_boxes_height, [-1, 1])
    gt_width = tf.reshape(input_boxes_width, [-1, 1])
    predicted_length = tf.reshape(output_boxes_length, [-1, 1])
    predicted_height = tf.reshape(output_boxes_height, [-1, 1])
    predicted_width = tf.reshape(output_boxes_width, [-1, 1])
    predicted_length /= gt_length
    predicted_height /= gt_height
    predicted_width /= gt_width
    predicted_size = tf.concat(
        [predicted_length, predicted_height, predicted_width], axis=1)
    gt_size = tf.ones_like(predicted_size)
    if loss_type == 'huber':
      loss_fn = tf.keras.losses.Huber(
          delta=delta, reduction=tf.keras.losses.Reduction.NONE)
    elif loss_type == 'absolute_difference':
      loss_fn = tf.keras.losses.MeanAbsoluteError(
          reduction=tf.keras.losses.Reduction.NONE)
    else:
      raise ValueError(('Unknown loss type %s.' % loss_type))
    size_losses = loss_fn(y_true=gt_size, y_pred=predicted_size)
    return tf.reduce_mean(size_losses * tf.reshape(weights, [-1]))

  cond_input = tf.greater(tf.shape(input_boxes_length)[0], 0)
  cond_output = tf.greater(tf.shape(output_boxes_length)[0], 0)
  cond = tf.logical_and(cond_input, cond_output)
  return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))
def _voxel_hard_negative_classification_loss_unbatched(inputs_1, outputs_1,
                                                       is_intermediate, gamma):
    """Loss function for input and outputs of batch size 1."""
    inputs_1, outputs_1 = _get_voxels_valid_inputs_outputs(inputs_1=inputs_1,
                                                           outputs_1=outputs_1)
    if is_intermediate:
        logits = outputs_1[standard_fields.DetectionResultFields.
                           intermediate_object_semantic_voxels]
    else:
        logits = outputs_1[
            standard_fields.DetectionResultFields.object_semantic_voxels]
    labels = tf.reshape(
        inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1])
    background_mask = tf.equal(labels, 0)
    num_background_points = tf.reduce_sum(
        tf.cast(background_mask, dtype=tf.int32))

    def loss_fn():
        """Loss function."""
        num_classes = logits.get_shape().as_list()[-1]
        if num_classes is None:
            raise ValueError('Number of classes is unknown.')
        masked_logits = tf.boolean_mask(logits, background_mask)
        masked_weights = tf.pow(
            1.0 - tf.reshape(tf.nn.softmax(masked_logits)[:, 0], [-1, 1]),
            gamma)
        num_points = tf.shape(masked_logits)[0]
        masked_weights = masked_weights * tf.cast(
            num_points, dtype=tf.float32) / tf.reduce_sum(masked_weights)
        masked_labels_one_hot = tf.one_hot(indices=tf.boolean_mask(
            labels, background_mask),
                                           depth=num_classes)
        loss = classification_loss_fn(logits=masked_logits,
                                      labels=masked_labels_one_hot,
                                      weights=masked_weights)
        return loss

    cond = tf.logical_and(tf.greater(num_background_points, 0),
                          tf.greater(tf.shape(labels)[0], 0))
    return tf.cond(cond, loss_fn, lambda: tf.constant(0.0, dtype=tf.float32))
Beispiel #22
0
def train_q(dataset,
            policy,
            optimizer=None,
            pack_transition_fn=None,
            q_graph_fn=None,
            log_dir=None,
            master='',
            task=0,
            training_steps=None,
            max_training_steps=100000,
            reuse=False,
            init_checkpoint=None,
            update_target_every_n_steps=50,
            log_every_n_steps=None,
            save_checkpoint_steps=500,
            save_summaries_steps=500):
    """Self-contained learning loop for offline Q-learning.

  Code inspired by OpenAI Baselines' deepq.build_train. This function is
  compatible with discrete Q-learning graphs, continuous Q learning graphs, and
  SARSA.

  Args:
    dataset: tf.data.Dataset providing transitions.
    policy: Instance of TFDQNPolicy class that provides functor for building the
      critic function.
    optimizer: Optional instance of an optimizer. If not specified, creates an
      AdamOptimizer using the default constructor.
    pack_transition_fn: Optional function that performs additional processing
      of the transition. This is a convenience method for ad-hoc manipulation of
      transition data passed to the learning function after parsing.
    q_graph_fn: Function used to construct training objectives w.r.t. critic
      outputs.
    log_dir: Where to save model checkpoints and tensorboard summaries.
    master: Optional address of master worker. Specify this when doing
      distributed training.
    task: Optional worker task for distributed training. Defaults to solo master
      task on a single machine.
    training_steps: Optional number of steps to run training before terminating
      early. Max_training_steps remains unchanged - training will terminate
      after max_training_steps whether or not training_steps is specified.
    max_training_steps: maximum number of training iters.
    reuse: If True, reuse existing variables for all declared variables by this
      function.
    init_checkpoint: Optional checkpoint to restore prior to training. If not
      provided, variables are initialized using global_variables_initializer().
    update_target_every_n_steps: How many global steps (training) between
      copying the Q network weights (scope='q_func') to target network
      (scope='target_q_func').
    log_every_n_steps: How many global steps between logging loss tensors.
    save_checkpoint_steps: How many global steps between saving TF variables
      to a checkpoint file.
    save_summaries_steps: How many global steps between saving TF summaries.

  Returns:
    (int) Current `global_step` reached after training for training_steps, or
    `max_training_steps` if `global_step` has reached `max_training_steps`.

  Raises:
    ValueError: If a batch of transitions is empty or the zeroth element is
      empty, when it's supposed to be of length batch_size.
  """
    data_iterator = dataset.make_one_shot_iterator()

    transition = data_iterator.get_next()
    if pack_transition_fn:
        transition = pack_transition_fn(transition)

    if optimizer is None:
        optimizer = tf.train.AdamOptimizer()

    q_func = policy.get_q_func(is_training=True, reuse=reuse)
    loss, all_summaries = q_graph_fn(q_func, transition)

    q_func_vars = contrib_framework.get_trainable_variables(scope='q_func')
    target_q_func_vars = contrib_framework.get_trainable_variables(
        scope='target_q_func')
    global_step = tf.train.get_or_create_global_step()

    # Only optimize q_func and update its batchnorm params.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope='q_func')
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss,
                                      global_step=global_step,
                                      var_list=q_func_vars)

    chief_hooks = []
    hooks = []
    # Save summaries periodically.
    if save_summaries_steps is not None:
        chief_hooks.append(
            tf.train.SummarySaverHook(save_steps=save_summaries_steps,
                                      output_dir=log_dir,
                                      summary_op=all_summaries))

    # Stop after training_steps
    if max_training_steps:
        hooks.append(tf.train.StopAtStepHook(last_step=max_training_steps))

    # Report if loss tensor is NaN.
    hooks.append(tf.train.NanTensorHook(loss))

    if log_every_n_steps is not None:
        tensor_dict = {'global_step': global_step, 'train loss': loss}
        chief_hooks.append(
            tf.train.LoggingTensorHook(tensor_dict,
                                       every_n_iter=log_every_n_steps))

        # Measure how fast we are training per sec and save to summary.
        chief_hooks.append(
            tf.train.StepCounterHook(every_n_steps=log_every_n_steps,
                                     output_dir=log_dir))

    # If target network exists, periodically update target Q network with new
    # weights (frozen target network). We hack this by
    # abusing a LoggingTensorHook for this.
    if target_q_func_vars and update_target_every_n_steps is not None:
        update_target_expr = []
        for var, var_t in zip(sorted(q_func_vars, key=lambda v: v.name),
                              sorted(target_q_func_vars,
                                     key=lambda v: v.name)):
            update_target_expr.append(var_t.assign(var))
        update_target_expr = tf.group(*update_target_expr)

        with tf.control_dependencies([update_target_expr]):
            update_target = tf.constant(0)
        chief_hooks.append(
            tf.train.LoggingTensorHook(
                {'update_target': update_target},
                every_n_iter=update_target_every_n_steps))

    # Save checkpoints periodically, save all of them.
    saver = tf.train.Saver(max_to_keep=None)
    chief_hooks.append(
        tf.train.CheckpointSaverHook(log_dir,
                                     save_steps=save_checkpoint_steps,
                                     saver=saver,
                                     checkpoint_basename='model.ckpt'))

    # Save our experiment params to checkpoint dir.
    chief_hooks.append(
        gin.tf.GinConfigSaverHook(log_dir, summarize_config=True))

    session_config = tf.ConfigProto(log_device_placement=False)

    init_fn = None
    if init_checkpoint:
        assign_fn = contrib_framework.assign_from_checkpoint_fn(
            init_checkpoint, contrib_framework.get_model_variables())
        init_fn = lambda _, sess: assign_fn(sess)
    scaffold = tf.train.Scaffold(saver=saver, init_fn=init_fn)
    with tf.train.MonitoredTrainingSession(
            master=master,
            is_chief=(task == 0),
            config=session_config,
            checkpoint_dir=log_dir,
            scaffold=scaffold,
            hooks=hooks,
            chief_only_hooks=chief_hooks) as sess:
        np_step = 0
        while not sess.should_stop():
            np_step, _ = sess.run([global_step, train_op])
            if training_steps and np_step % training_steps == 0:
                break
        done = np_step >= max_training_steps
    return np_step, done
Beispiel #23
0
 def filter_fn(e):
   return tf.math.reduce_all(
       tf.math.not_equal(e['tfds_id'], tf.constant(valid_tfds_ids_np)))
def _box_classification_using_center_distance_loss_unbatched(
        inputs_1, outputs_1, is_intermediate, is_balanced,
        max_positive_normalized_distance):
    """Loss function for input and outputs of batch size 1."""
    inputs_1, outputs_1 = _get_voxels_valid_inputs_outputs(inputs_1=inputs_1,
                                                           outputs_1=outputs_1)
    if is_intermediate:
        output_object_centers = outputs_1[standard_fields.DetectionResultFields
                                          .intermediate_object_center_voxels]
        output_object_length = outputs_1[standard_fields.DetectionResultFields.
                                         intermediate_object_length_voxels]
        output_object_height = outputs_1[standard_fields.DetectionResultFields.
                                         intermediate_object_height_voxels]
        output_object_width = outputs_1[standard_fields.DetectionResultFields.
                                        intermediate_object_width_voxels]
        output_object_rotation_matrix = outputs_1[
            standard_fields.DetectionResultFields.
            intermediate_object_rotation_matrix_voxels]
        logits = outputs_1[standard_fields.DetectionResultFields.
                           intermediate_object_semantic_voxels]
    else:
        output_object_centers = outputs_1[
            standard_fields.DetectionResultFields.object_center_voxels]
        output_object_length = outputs_1[
            standard_fields.DetectionResultFields.object_length_voxels]
        output_object_height = outputs_1[
            standard_fields.DetectionResultFields.object_height_voxels]
        output_object_width = outputs_1[
            standard_fields.DetectionResultFields.object_width_voxels]
        output_object_rotation_matrix = outputs_1[
            standard_fields.DetectionResultFields.
            object_rotation_matrix_voxels]
        logits = outputs_1[
            standard_fields.DetectionResultFields.object_semantic_voxels]
    normalized_center_distance = loss_utils.get_normalized_corner_distances(
        predicted_boxes_center=output_object_centers,
        predicted_boxes_length=output_object_length,
        predicted_boxes_height=output_object_height,
        predicted_boxes_width=output_object_width,
        predicted_boxes_rotation_matrix=output_object_rotation_matrix,
        gt_boxes_center=inputs_1[
            standard_fields.InputDataFields.object_center_voxels],
        gt_boxes_length=inputs_1[
            standard_fields.InputDataFields.object_length_voxels],
        gt_boxes_height=inputs_1[
            standard_fields.InputDataFields.object_height_voxels],
        gt_boxes_width=inputs_1[
            standard_fields.InputDataFields.object_width_voxels],
        gt_boxes_rotation_matrix=inputs_1[
            standard_fields.InputDataFields.object_rotation_matrix_voxels])
    labels = tf.reshape(
        inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1])
    instances = tf.reshape(
        inputs_1[standard_fields.InputDataFields.object_instance_id_voxels],
        [-1])
    params = {}
    if is_balanced:
        weights = loss_utils.get_balanced_loss_weights_multiclass(
            labels=tf.expand_dims(instances, axis=1))
        params['weights'] = weights

    def loss_fn():
        """Loss function."""
        num_classes = logits.get_shape().as_list()[-1]
        if num_classes is None:
            raise ValueError('Number of classes is unknown.')
        labels_one_hot = tf.one_hot(indices=(labels - 1),
                                    depth=(num_classes - 1))
        inverse_distance_coef = tf.maximum(
            tf.minimum(
                1.0 -
                normalized_center_distance / max_positive_normalized_distance,
                1.0), 0.0)
        labels_one_hot = tf.reshape(inverse_distance_coef,
                                    [-1, 1]) * labels_one_hot
        background_label = 1.0 - tf.math.reduce_sum(
            labels_one_hot, axis=1, keepdims=True)
        labels_one_hot = tf.concat([background_label, labels_one_hot], axis=1)
        loss = classification_loss_fn(logits=logits,
                                      labels=labels_one_hot,
                                      **params)
        return loss

    return tf.cond(tf.greater(tf.shape(labels)[0], 0), loss_fn,
                   lambda: tf.constant(0.0, dtype=tf.float32))
Beispiel #25
0
def npair_loss_func(embeddings,
                    instance_ids,
                    num_samples,
                    valid_mask=None,
                    max_instance_id=None,
                    similarity_strategy='dotproduct',
                    loss_strategy='softmax'):
  """N-pair metric learning loss for learning feature embeddings.

  Args:
    embeddings: A tf.float32 tensor of size [batch_size, n, f].
    instance_ids: A tf.int32 tensor of size [batch_size, n].
    num_samples: An int determinig the number of samples.
    valid_mask: A tf.bool tensor of size [batch_size, n] that is True when an
      element is valid and False if it needs to be ignored. By default the value
      is None which means it is not applied.
    max_instance_id: If set, instance ids larger than that value will be
      ignored. If not set, it will be computed from instance_ids tensor.
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'.

  Returns:
    A tf.float32 scalar loss tensor.
  """
  batch_size = embeddings.get_shape().as_list()[0]
  if batch_size is None:
    raise ValueError('Unknown batch size at graph construction time.')
  if max_instance_id is None:
    max_instance_id = tf.reduce_max(instance_ids)
  sampled_embeddings, sampled_instance_ids, _ = sampling_utils.balanced_sample(
      features=embeddings,
      instance_ids=instance_ids,
      num_samples=num_samples,
      valid_mask=valid_mask,
      max_instance_id=max_instance_id)
  losses = []
  for i in range(batch_size):
    sampled_instance_ids_i = sampled_instance_ids[i, :]
    sampled_embeddings_i = sampled_embeddings[i, :, :]
    min_ids_i = tf.math.reduce_min(sampled_instance_ids_i)
    max_ids_i = tf.math.reduce_max(sampled_instance_ids_i)
    target_i = tf.one_hot(
        sampled_instance_ids_i,
        depth=(max_instance_id + 1),
        dtype=tf.float32)

    # pylint: disable=cell-var-from-loop
    def npair_loss_i():
      return metric_learning_losses.npair_loss(
          embedding=sampled_embeddings_i,
          target=target_i,
          similarity_strategy=similarity_strategy,
          loss_strategy=loss_strategy)
# pylint: enable=cell-var-from-loop

    loss_i = tf.cond(
        max_ids_i > min_ids_i, npair_loss_i,
        lambda: tf.constant(0.0, dtype=tf.float32))
    losses.append(loss_i)
  return tf.math.reduce_mean(losses)
Beispiel #26
0
    def my_model_fn(features, labels, mode, params=None, config=None):
        """Estimator model function.

    Args:
      features: input features dictionary
      labels: ignored
      mode: a tf.estimator.ModeKeys
      params: something
      config: something

    Returns:
      something
    """
        del labels, config
        global_step = tf.train.get_global_step()
        if use_tpu:
            ctx = params["context"]
            num_hosts = ctx.num_hosts
            host_placement_fn = ctx.tpu_host_placement_function
            device_list = [
                host_placement_fn(host_id=t) for t in range(num_hosts)
            ]
            # TODO(ylc): Better estimation of replica cache size?
            replica_cache_size = 300 * 1000000  # 300M per replica
            # Worker 0 caches all the TPU binaries.
            worker0_mem = replica_cache_size * ctx.num_replicas
            devices_memeory_usage = [worker0_mem] + [0] * (num_hosts - 1)
            var_placer = mtf.utils.BalancedVariablePlacer(
                device_list, devices_memeory_usage)
            mesh_devices = [""] * mesh_shape.size
            physical_shape = list(
                params["context"].device_assignment.topology.mesh_shape)
            logical_to_physical = _logical_to_physical(physical_shape,
                                                       mesh_shape)
            mesh_impl = mtf.simd_mesh_impl.SimdMeshImpl(
                mesh_shape,
                layout_rules,
                mesh_devices,
                ctx.device_assignment,
                logical_to_physical=logical_to_physical)
        else:
            var_placer = None
            mesh_devices = [""] * mesh_shape.size
            mesh_impl = mtf.placement_mesh_impl.PlacementMeshImpl(
                mesh_shape, layout_rules, mesh_devices)

        graph = mtf.Graph()
        mesh = mtf.Mesh(graph, "my_mesh", var_placer)

        def _import_feature(key, allow_missing=False):
            """Import a feature from the features dictionary into a mtf.Tensor.

      Args:
        key: a string
        allow_missing: a boolean

      Returns:
        a mtf.Tensor with dtype int32 and shape [batch_dim, length_dim]
      """
            outer_batch_dim = mtf.Dimension("outer_batch", outer_batch_size)
            batch_dim = mtf.Dimension("batch", batch_size // outer_batch_size)
            length_dim = mtf.Dimension("length", sequence_length)

            mtf_shape = mtf.Shape([outer_batch_dim, batch_dim, length_dim])
            if key not in features:
                if allow_missing:
                    return None
                else:
                    raise ValueError("feature not found %s - features %s = " %
                                     (key, features))
            tf.logging.info("Import feature %s: %s" % (key, features[key]))

            x = tf.to_int32(features[key])
            x = tf.reshape(
                x, [outer_batch_size, batch_size // outer_batch_size, -1])

            if not use_tpu:
                x = tf.Print(x, [x],
                             "import feature %s" % key,
                             summarize=1000,
                             first_n=1)
            return mtf.import_fully_replicated(mesh, x, mtf_shape, name=key)

        if mode == tf.estimator.ModeKeys.PREDICT:
            inputs = _import_feature("inputs")
            inputs = mtf.reshape(
                inputs,
                mtf.Shape([
                    mtf.Dimension("batch", batch_size),
                    mtf.Dimension("length", sequence_length)
                ]))
            if isinstance(transformer_model, transformer.Unitransformer):
                mtf_samples = transformer_model.sample_autoregressive(
                    inputs, variable_dtype=get_variable_dtype())
            elif isinstance(transformer_model, transformer.Bitransformer):
                mtf_samples = transformer_model.decode(
                    inputs, variable_dtype=get_variable_dtype())
            else:
                raise ValueError("unrecognized class")
            mtf_samples = mtf.anonymize(mtf_samples)
            lowering = mtf.Lowering(graph, {mesh: mesh_impl},
                                    autostack=autostack)
            outputs = lowering.export_to_tf_tensor(mtf_samples)
            predictions = {"outputs": outputs}
            return tpu_estimator.TPUEstimatorSpec(
                mode=tf.estimator.ModeKeys.PREDICT,
                predictions=predictions,
                prediction_hooks=[mtf.MtfRestoreHook(lowering)])

        targets = _import_feature("targets")
        anon_targets = mtf.anonymize(targets)
        if model_type == "lm":
            _, length_dim = targets.shape
            inputs = mtf.shift(targets, offset=1, dim=length_dim, wrap=False)
        else:
            inputs = _import_feature("inputs")

        if mode == tf.estimator.ModeKeys.EVAL:
            if isinstance(transformer_model, transformer.Unitransformer):
                mtf_samples = transformer_model.sample_autoregressive(
                    inputs, variable_dtype=get_variable_dtype())
            elif isinstance(transformer_model, transformer.Bitransformer):
                mtf_samples = transformer_model.decode(
                    inputs, variable_dtype=get_variable_dtype())
            else:
                raise ValueError("unrecognized class")
            mtf_samples = mtf.anonymize(mtf_samples)
            lowering = mtf.Lowering(graph, {mesh: mesh_impl},
                                    autostack=autostack)
            outputs = lowering.export_to_tf_tensor(mtf_samples)
            labels = lowering.export_to_tf_tensor(anon_targets)
            restore_hook = mtf.MtfRestoreHook(lowering)

            # metric_names becomes locally scoped if we simply assign
            # ["padded_neg_log_perplexity"] to it conditioned on if it's None.
            local_metric_names = metric_names or ["token_accuracy"]

            def metric_fn(labels, outputs):
                return get_metric_fns(local_metric_names, labels, outputs)

            eval_metrics = (metric_fn, [labels, outputs])
            return tpu_estimator.TPUEstimatorSpec(
                tf.estimator.ModeKeys.EVAL,
                # Unfortunately TPUEstimatorSpec requires us to provide a value for
                # loss when in EVAL mode. Since we are sampling or decoding from the
                # model, we don't have a loss to report.
                loss=tf.constant(0.),
                evaluation_hooks=[restore_hook],
                eval_metrics=eval_metrics)

        if isinstance(transformer_model, transformer.Unitransformer):
            position_kwargs = dict(
                sequence_id=_import_feature("targets_segmentation", True),
                position=_import_feature("targets_position", True),
            )
        elif isinstance(transformer_model, transformer.Bitransformer):
            position_kwargs = dict(
                encoder_sequence_id=_import_feature("inputs_segmentation",
                                                    True),
                decoder_sequence_id=_import_feature("targets_segmentation",
                                                    True),
                encoder_position=_import_feature("inputs_position", True),
                decoder_position=_import_feature("targets_position", True),
            )
        else:
            raise ValueError("unrecognized class")

        logits, loss = transformer_model.call_simple(
            inputs=inputs,
            targets=targets,
            compute_loss=True,
            mode=mode,
            variable_dtype=get_variable_dtype(),
            **position_kwargs)

        if use_tpu and logits is not None:
            logits = mtf.anonymize(logits)

        # TRAIN mode
        if mode == tf.estimator.ModeKeys.TRAIN:
            var_grads = mtf.gradients(
                [loss], [v.outputs[0] for v in graph.trainable_variables])
            optimizer = mtf.optimize.AdafactorOptimizer(
                learning_rate=learning_rate)
            update_ops = optimizer.apply_grads(var_grads,
                                               graph.trainable_variables)

        lowering = mtf.Lowering(graph, {mesh: mesh_impl}, autostack=autostack)

        tf_loss = lowering.export_to_tf_tensor(loss)
        tf_loss = tf.to_float(tf_loss)
        if not use_tpu:
            tf_loss = tf.Print(tf_loss,
                               [tf_loss, tf.train.get_global_step()],
                               "step, tf_loss")

        if mode == tf.estimator.ModeKeys.TRAIN:
            tf_update_ops = [
                lowering.lowered_operation(op) for op in update_ops
            ]
            tf_update_ops.append(tf.assign_add(global_step, 1))
            train_op = tf.group(tf_update_ops)

        with mtf.utils.outside_all_rewrites():
            # Copy master variables to slices. Must be called first.
            restore_hook = mtf.MtfRestoreHook(lowering)
            saver = tf.train.Saver(tf.global_variables(),
                                   sharded=True,
                                   max_to_keep=checkpoints_to_keep,
                                   keep_checkpoint_every_n_hours=2,
                                   defer_build=False,
                                   save_relative_paths=True)
            tf.add_to_collection(tf.GraphKeys.SAVERS, saver)
            saver_listener = mtf.MtfCheckpointSaverListener(lowering)
            saver_hook = tf.train.CheckpointSaverHook(
                model_dir,
                save_steps=save_steps,
                saver=saver,
                listeners=[saver_listener])
            gin_config_saver_hook = gin.tf.GinConfigSaverHook(
                model_dir, summarize_config=True)

            if mode == tf.estimator.ModeKeys.TRAIN:
                if use_tpu:
                    return tpu_estimator.TPUEstimatorSpec(
                        mode=tf.estimator.ModeKeys.TRAIN,
                        loss=tf_loss,
                        train_op=train_op,
                        training_hooks=[
                            restore_hook,
                            saver_hook,
                            gin_config_saver_hook,
                        ])
                else:
                    return tf.estimator.EstimatorSpec(
                        tf.estimator.ModeKeys.TRAIN,
                        loss=tf_loss,
                        train_op=train_op,
                        training_chief_hooks=[
                            restore_hook,
                            saver_hook,
                            gin_config_saver_hook,
                        ])
Beispiel #27
0
    def __call__(self, example_string):
        """Processes a single example string.

    Extracts and processes the image, and ignores the label. We assume that the
    image has three channels.

    Args:
      example_string: str, an Example protocol buffer.

    Returns:
      image_rescaled: the image, resized to `image_size x image_size` and
      rescaled to [-1, 1]. Note that Gaussian data augmentation may cause values
      to go beyond this range.
    """
        image_string = tf.parse_single_example(
            example_string,
            features={
                'image': tf.FixedLenFeature([], dtype=tf.string),
                'label': tf.FixedLenFeature([], tf.int64)
            })['image']
        image_decoded = tf.image.decode_image(image_string, channels=3)
        image_decoded.set_shape([None, None, 3])
        image_resized = tf.image.resize_images(
            image_decoded, [self.image_size, self.image_size],
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=True)
        image = tf.cast(image_resized, tf.float32)

        if self.data_augmentation is not None:
            if self.data_augmentation.enable_random_brightness:
                delta = self.data_augmentation.random_brightness_delta
                image = tf.image.random_brightness(image, delta)

            if self.data_augmentation.enable_random_saturation:
                delta = self.data_augmentation.random_saturation_delta
                image = tf.image.random_saturation(image, 1 - delta, 1 + delta)

            if self.data_augmentation.enable_random_contrast:
                delta = self.data_augmentation.random_contrast_delta
                image = tf.image.random_contrast(image, 1 - delta, 1 + delta)

            if self.data_augmentation.enable_random_hue:
                delta = self.data_augmentation.random_hue_delta
                image = tf.image.random_hue(image, delta)

            if self.data_augmentation.enable_random_flip:
                image = tf.image.random_flip_left_right(image)

        image = 2 * (image / 255.0 - 0.5)  # Rescale to [-1, 1].

        if self.data_augmentation is not None:
            if self.data_augmentation.enable_gaussian_noise:
                image = image + tf.random_normal(tf.shape(
                    image)) * self.data_augmentation.gaussian_noise_std

            if self.data_augmentation.enable_jitter:
                j = self.data_augmentation.jitter_amount
                paddings = tf.constant([[j, j], [j, j], [0, 0]])
                image = tf.pad(image, paddings, 'REFLECT')
                image = tf.image.random_crop(
                    image, [self.image_size, self.image_size, 3])

        return image
def _box_corner_distance_loss(
    loss_type, is_balanced, input_boxes_length, input_boxes_height,
    input_boxes_width, input_boxes_center, input_boxes_rotation_matrix,
    input_boxes_instance_id, output_boxes_length, output_boxes_height,
    output_boxes_width, output_boxes_center, output_boxes_rotation_matrix,
    delta):
  """Computes regression loss on object corner locations."""

  def fn():
    """Loss function for when number of input and output boxes is positive."""
    if is_balanced:
      weights = loss_utils.get_balanced_loss_weights_multiclass(
          labels=input_boxes_instance_id)
    else:
      weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                        dtype=tf.float32)
    normalized_box_size = 5.0
    predicted_boxes_length = output_boxes_length
    predicted_boxes_height = output_boxes_height
    predicted_boxes_width = output_boxes_width
    predicted_boxes_center = output_boxes_center
    predicted_boxes_rotation_matrix = output_boxes_rotation_matrix
    gt_boxes_length = input_boxes_length
    gt_boxes_height = input_boxes_height
    gt_boxes_width = input_boxes_width
    gt_boxes_center = input_boxes_center
    gt_boxes_rotation_matrix = input_boxes_rotation_matrix
    if loss_type in ['normalized_huber', 'normalized_euclidean']:
      predicted_boxes_length /= (gt_boxes_length / normalized_box_size)
      predicted_boxes_height /= (gt_boxes_height / normalized_box_size)
      predicted_boxes_width /= (gt_boxes_width / normalized_box_size)
      gt_boxes_length = tf.ones_like(
          gt_boxes_length, dtype=tf.float32) * normalized_box_size
      gt_boxes_height = tf.ones_like(
          gt_boxes_height, dtype=tf.float32) * normalized_box_size
      gt_boxes_width = tf.ones_like(
          gt_boxes_width, dtype=tf.float32) * normalized_box_size
    gt_box_corners = box_utils.get_box_corners_3d(
        boxes_length=gt_boxes_length,
        boxes_height=gt_boxes_height,
        boxes_width=gt_boxes_width,
        boxes_rotation_matrix=gt_boxes_rotation_matrix,
        boxes_center=gt_boxes_center)
    predicted_box_corners = box_utils.get_box_corners_3d(
        boxes_length=predicted_boxes_length,
        boxes_height=predicted_boxes_height,
        boxes_width=predicted_boxes_width,
        boxes_rotation_matrix=predicted_boxes_rotation_matrix,
        boxes_center=predicted_boxes_center)
    corner_weights = tf.tile(weights, [1, 8])
    if loss_type in ['huber', 'normalized_huber']:
      loss_fn = tf.keras.losses.Huber(
          delta=delta, reduction=tf.keras.losses.Reduction.NONE)
    elif loss_type in ['normalized_absolute_difference', 'absolute_difference']:
      loss_fn = tf.keras.losses.MeanAbsoluteError(
          reduction=tf.keras.losses.Reduction.NONE)
    else:
      raise ValueError(('Unknown loss type %s.' % loss_type))
    box_corner_losses = loss_fn(
        y_true=tf.reshape(gt_box_corners, [-1, 3]),
        y_pred=tf.reshape(predicted_box_corners, [-1, 3]))
    return tf.reduce_mean(box_corner_losses * tf.reshape(corner_weights, [-1]))

  cond_input = tf.greater(tf.shape(input_boxes_length)[0], 0)
  cond_output = tf.greater(tf.shape(output_boxes_length)[0], 0)
  cond = tf.logical_and(cond_input, cond_output)
  return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))