예제 #1
0
  def _Extract(self, features):
    """Returns the laser Tensor."""
    p = self.params
    all_xyzs = []
    all_laser_features = []
    for feature_name in self.FeatureMap():
      laser_data = tf.reshape(
          _Dense(features[feature_name]), [-1, 3 + p.num_features])
      points_xyz = laser_data[..., 0:3]
      points_feature = laser_data[..., 3:]

      all_xyzs += [points_xyz]
      all_laser_features += [points_feature]

    # Stack all of the points along the major dimension
    points_xyz = tf.concat(all_xyzs, axis=0)
    points_feature = tf.concat(all_laser_features, axis=0)

    if p.max_num_points is not None:
      npoints = tf.shape(points_xyz)[0]
      points_xyz = py_utils.PadOrTrimTo(points_xyz, [p.max_num_points, 3])
      points_feature = py_utils.PadOrTrimTo(points_feature,
                                            [p.max_num_points, p.num_features])
      points_padding = 1.0 - py_utils.PadOrTrimTo(
          tf.ones([npoints]), [p.max_num_points])
    else:
      points_padding = None

    return py_utils.NestedMap(
        points_xyz=points_xyz,
        points_feature=points_feature,
        points_padding=points_padding)
예제 #2
0
        def _TokenizeOneSentence(i, text, token_ids_ta, target_ids_ta,
                                 paddings_ta):
            """Tokenizes a single sentence."""
            if tf.is_tensor(i):
                text_i = tf.gather(text, i)
            else:
                text_i = text[i]
            ids = self._tokenizer.tokenize(text_i).merge_dims(0, -1)
            ids.set_shape([None])

            if append_eos:
                ids = tf.concat([ids, [self.eos_id]], axis=0)
            sos_ids = tf.concat([[self.sos_id], ids], axis=0)
            if p.prepend_sos:
                ids = sos_ids

            # This truncates after the EOS is added, so some sentences might
            # not have EOS at the end.
            token_ids_ta = token_ids_ta.write(
                i, py_utils.PadOrTrimTo(sos_ids, [max_length], 0))
            target_ids_ta = target_ids_ta.write(
                i, py_utils.PadOrTrimTo(ids, [max_length], 0))
            paddings_ta = paddings_ta.write(
                i,
                py_utils.PadOrTrimTo(tf.zeros_like(ids, dtype=tf.float32),
                                     [max_length], 1.))

            return i + 1, strs, token_ids_ta, target_ids_ta, paddings_ta
예제 #3
0
    def _InputBatch(self):
        p = self.params

        @tf.function
        def ReadData():
            x, y = io_ops.restore_v2(p.ckpt, [p.data, p.label], [''] * 2,
                                     [p.data_dtype, p.label_dtype])
            # Always convert to float32.
            return tf.cast(x, tf.float32), tf.cast(y, tf.float32)

        # Loads data and label into memory and keep it around.
        data, label = ops.cached_call(f=ReadData.get_concrete_function(),
                                      T=[tf.float32, tf.float32])
        b, shape = self.InfeedBatchSize(), list(p.data_shape)
        data = tf.reshape(data, [-1] + shape)
        label = tf.reshape(label, [-1])
        label = py_utils.HasShape(label, [tf.shape(data)[0]])
        sample_ids = ops.random_permutation_sequence(
            num=p.num_samples,
            batch=b,
            repeat=p.repeat,
            seed=p.random_seed if p.random_seed else 0)
        n = tf.shape(sample_ids)[0]
        raw = py_utils.PadOrTrimTo(tf.gather(data, sample_ids), [b] + shape)
        ret = py_utils.NestedMap(
            raw=raw,
            data=self._Preprocess(raw),
            label=py_utils.PadOrTrimTo(tf.gather(label, sample_ids), [b]),
            weight=py_utils.PadOrTrimTo(tf.ones([n], dtype=tf.float32), [b]))
        if not py_utils.use_tpu():
            ret['sample_ids'] = sample_ids
        return ret
예제 #4
0
    def BatchedOrientedNMSIndices(self, bboxes, scores, nms_iou_threshold,
                                  score_threshold, max_boxes_per_class):
        """Runs batched version of a Per-Class 3D (7-DOF) Non Max Suppression.

    All outputs have shape [batch_size, num_classes, max_boxes_per_class].

    Args:
      bboxes: A [batch_size, num_boxes, 7] floating point Tensor of bounding
        boxes in [x, y, z, dx, dy, dz, phi] format.
      scores: A [batch_size, num_boxes, num_classes] floating point Tensor
        containing box scores.
      nms_iou_threshold: Either a float or a list of floats of len num_classes
        with the IoU threshold to use when determining whether two boxes overlap
        for purposes of suppression.
      score_threshold: Either a float or a list of floats of len num_classes
        with the score threshold that allows NMS to quickly ignore boxes.
      max_boxes_per_class: An integer scalar with the maximum number of boxes
        per example to emit per class.

    Returns:
      A tuple of 3 tensors:

      - bbox_indices: An int32 Tensor with the indices of the chosen boxes.
        Values are in sort order until the class_idx switches.
      - bbox_scores: A float32 Tensor with the score for each box.
      - valid_mask: A float32 Tensor with 1/0 values indicating the validity of
        each box. 1 indicates valid, and 0 invalid.
    """
        bboxes = py_utils.HasShape(bboxes, [-1, -1, 7])
        batch_size, num_boxes = py_utils.GetShape(bboxes, 2)
        scores = py_utils.HasShape(scores, [batch_size, num_boxes, -1])
        _, _, num_classes = py_utils.GetShape(scores)

        # Force the thresholds to be tensors of len num_classes
        nms_iou_threshold = tf.broadcast_to(
            tf.convert_to_tensor(nms_iou_threshold), [num_classes])
        score_threshold = tf.broadcast_to(
            tf.convert_to_tensor(score_threshold), [num_classes])

        def NMSBody(args):
            per_sample_bboxes, per_sample_scores = args
            indices, scores, mask = ops.non_max_suppression_3d(
                per_sample_bboxes,
                per_sample_scores,
                nms_iou_threshold=nms_iou_threshold,
                score_threshold=score_threshold,
                max_boxes_per_class=max_boxes_per_class)
            return indices, scores, mask

        bbox_indices, bbox_scores, valid_mask = tf.map_fn(
            fn=NMSBody,
            elems=(bboxes, scores),
            dtype=(tf.int32, tf.float32, tf.float32),
            back_prop=False)

        output_shape = [batch_size, num_classes, max_boxes_per_class]
        bbox_indices = py_utils.PadOrTrimTo(bbox_indices, output_shape)
        bbox_scores = py_utils.PadOrTrimTo(bbox_scores, output_shape)
        valid_mask = py_utils.PadOrTrimTo(valid_mask, output_shape)
        return bbox_indices, bbox_scores, valid_mask
예제 #5
0
  def _Extract(self, features):
    """Returns the laser Tensor."""
    p = self.params
    all_xyzs = []
    all_laser_features = []

    for lidar in p.lidar_names:
      for ri in p.lidar_returns:
        feature_name = 'laser_%s_%s' % (lidar, ri)
        laser_data = tf.reshape(
            _Dense(features[feature_name]), [-1, 3 + p.num_features])
        points_xyz = laser_data[..., 0:3]
        points_feature = laser_data[..., 3:]

        all_xyzs += [points_xyz]
        all_laser_features += [points_feature]

    # Stack all of the points along the major dimension
    points_xyz = tf.concat(all_xyzs, axis=0)
    points_feature = tf.concat(all_laser_features, axis=0)

    if p.max_num_points is not None:
      npoints = tf.shape(points_xyz)[0]
      points_xyz = py_utils.PadOrTrimTo(points_xyz, [p.max_num_points, 3])
      points_feature = py_utils.PadOrTrimTo(points_feature,
                                            [p.max_num_points, p.num_features])
      points_padding = 1.0 - py_utils.PadOrTrimTo(
          tf.ones([npoints]), [p.max_num_points])

    ret = py_utils.NestedMap(
        points_xyz=points_xyz, points_feature=points_feature)
    if p.max_num_points is not None:
      ret.points_padding = points_padding
    return ret
예제 #6
0
  def _InputBatchFromCKPT(self):
    p = self.params

    @function.Defun()
    def ReadData():
      x, = io_ops.restore_v2(p.ckpt, [p.data], [''],
                               [p.data_dtype])
      return x

    # Loads data and label into memory and keep it around.
    data, = py_x_ops.cached_call(f=ReadData, T=[p.data_dtype])
    
    
    b = p.batch_size
    total_length = p.data_shape[0]
    total_batches = total_length // b
    total_steps = total_batches // p.num_steps
    left_over = total_batches % p.num_steps > 0
    if left_over:
      total_steps += 1
    
    if p.eval:
      dataset = tf.data.Dataset.range(total_steps).repeat()
      iterator = dataset.make_one_shot_iterator()
      global_step = iterator.get_next()
    else:
      global_step = py_utils.GetOrCreateGlobalStep() - 1
    
    batch_id = tf.to_int32(global_step % total_steps)
    
    data = data[:total_batches * b]
    data = tf.reshape(data, [b, total_batches])
    
    start = p.num_steps * batch_id
    end = tf.minimum(tf.to_int32(total_batches), start + p.num_steps)
    raw = tf.gather(data, tf.range(start, end, dtype=tf.int32), axis=1, name='ids')
    label_end = tf.minimum(end + 1, tf.to_int32(total_batches))
    label = tf.gather(data, tf.range(start + 1, label_end, dtype=tf.int32), axis=1, name='labels')
    raw = py_utils.PadOrTrimTo(raw, [b, end - start])
    ret = py_utils.NestedMap()
    # raw = tf.reshape(data[:700], [20, 35])
    # ret.ids = raw
    # ret.labels = raw
    # ret.weights = tf.ones([20, 35])
    # ret.paddings = 1.0 - ret.weights
    # ret.word_count = 700
    # ret.take_last_state = py_utils.GetOrCreateGlobalStep() > 0
    ret.ids = raw
    ret.labels = py_utils.PadOrTrimTo(label, [b, end - start])
    ret.weights = py_utils.PadOrTrimTo(tf.ones([b, label_end - start], dtype=tf.float32), [b, end - start])
    ret.paddings = 1.0 - ret.weights
    ret.word_count = b * (label_end - start - 1)
    ret.take_last_state = batch_id > 0
    
    return ret
    def _Extract(self, features):
        p = self.params
        # Label values match the proto enum car.open_dataset.Label.Type. The value
        # range is [1..4] for non-background labels.
        labels = tf.cast(_Dense(features['labels']), tf.int32)
        labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects])
        label_ids = tf.reshape(_Dense(features['label_ids'], ''), [-1])
        label_ids = py_utils.PadOrTrimTo(label_ids, [p.max_num_objects], '')
        bboxes_3d = tf.reshape(_Dense(features['bboxes_3d']), [-1, 7])
        bboxes_3d_mask = tf.ones([tf.shape(bboxes_3d)[0]])
        bboxes_3d_num_points = tf.cast(
            _Dense(features['bboxes_3d_num_points']), tf.int32)
        bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7])
        bboxes_3d_mask = py_utils.PadOrTrimTo(bboxes_3d_mask,
                                              [p.max_num_objects])
        bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points,
                                                    [p.max_num_objects])
        label_metadata = tf.reshape(_Dense(features['label_metadata']),
                                    [-1, 4])
        label_metadata = py_utils.PadOrTrimTo(label_metadata,
                                              [p.max_num_objects, 4])

        detection_difficulties = py_utils.PadOrTrimTo(
            tf.cast(_Dense(features['detection_difficulties']), tf.int32),
            [p.max_num_objects])
        single_frame_detection_difficulties = py_utils.PadOrTrimTo(
            tf.cast(_Dense(features['single_frame_detection_difficulties']),
                    tf.int32), [p.max_num_objects])
        tracking_difficulties = py_utils.PadOrTrimTo(
            tf.cast(_Dense(features['tracking_difficulties']), tf.int32),
            [p.max_num_objects])
        unfiltered_bboxes_3d_mask = bboxes_3d_mask

        if p.filter_labels:
            valid_labels = tf.constant([p.filter_labels])
            bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1),
                                               valid_labels),
                                      axis=1)
            bboxes_3d_mask *= tf.cast(bbox_mask, tf.float32)

        outputs = {
            'labels': labels,
            'label_ids': label_ids,
            'detection_difficulties': detection_difficulties,
            'single_frame_detection_difficulties':
            single_frame_detection_difficulties,
            'tracking_difficulties': tracking_difficulties,
            'bboxes_3d': bboxes_3d,
            'bboxes_3d_mask': bboxes_3d_mask,
            'bboxes_3d_num_points': bboxes_3d_num_points,
            'unfiltered_bboxes_3d_mask': unfiltered_bboxes_3d_mask,
            'speed': label_metadata[:, :2],
            'acceleration': label_metadata[:, 2:],
        }

        return py_utils.NestedMap(outputs)
예제 #8
0
    def _NestedMapFromBatchedOutputs(self, outputs):
        """Create a NestedMap from a tuple of outputs from generic_input_op."""
        batch_size = self.InfeedBatchSize()
        shapes = self.Shape()
        shapes.VLog(0, 'input extractor shape: ')
        flatten_shapes = shapes.Flatten()
        dtypes = self.DType()
        flatten_dtypes = dtypes.FlattenItems()
        assert len(flatten_shapes) == len(outputs), '{} vs. {}'.format(
            len(flatten_shapes), len(outputs))
        assert len(flatten_dtypes) == len(outputs), '{} vs. {}'.format(
            len(flatten_dtypes), len(outputs))

        rets = []
        for (output, (name, dtype), shape) in zip(outputs, flatten_dtypes,
                                                  flatten_shapes):
            assert dtype == output.dtype, '{}: {} vs. {}'.format(
                name, dtype, output.dtype)
            # Pad every output to make shapes fixed according to the corresponding
            # declared shape, since the shapes of outputs are lost through
            # generic_input_op.
            try:
                shape.assert_is_fully_defined()
            except ValueError as e:
                raise ValueError('Invalid shape for %s: %s' % (name, e))
            padded = py_utils.PadOrTrimTo(output,
                                          [batch_size] + shape.as_list())
            rets += [padded]

        rets = shapes.Pack(rets)
        if py_utils.use_tpu():
            # Drops tf.string tensors, which is not supported on TPUs.
            rets = rets.Filter(lambda x: x.dtype != tf.string)
        return rets
예제 #9
0
    def _Extract(self, features):
        p = self.params

        if p.decode_image:
            raw = features['image/encoded']
            image = tf.image.decode_png(raw, channels=3)
            image = tf.image.convert_image_dtype(image, tf.float32)
            # Padding instead of rescaling to preserve the pixel coordinates.
            image = py_utils.PadOrTrimTo(
                image, [self._KITTI_MAX_HEIGHT, self._KITTI_MAX_WIDTH, 3])

        width = tf.reshape(features['image/width'], [1])
        height = tf.reshape(features['image/height'], [1])

        velo_to_image_plane = features['transform/velo_to_image_plane']
        velo_to_camera = features['transform/velo_to_camera']
        camera_to_velo = features['transform/camera_to_velo']

        extracted_features = py_utils.NestedMap(
            width=width,
            height=height,
            velo_to_image_plane=velo_to_image_plane,
            velo_to_camera=velo_to_camera,
            camera_to_velo=camera_to_velo)

        if p.decode_image:
            extracted_features.image = image

        return extracted_features
예제 #10
0
  def _NestedMapFromBatchedOutputs(self, outputs):
    """Create a NestedMap from a tuple of outputs from generic_input_op."""
    batch_size = self.InfeedBatchSize()
    shapes = self.Shape()
    shapes.VLog(0, 'input extractor shape: ')
    flatten_shapes = shapes.Flatten()
    dtypes = self.DType()
    flatten_dtypes = dtypes.FlattenItems()
    assert len(flatten_shapes) == len(outputs), '{} vs. {}'.format(
        len(flatten_shapes), len(outputs))
    assert len(flatten_dtypes) == len(outputs), '{} vs. {}'.format(
        len(flatten_dtypes), len(outputs))

    rets = []
    for (output, (name, dtype), shape) in zip(outputs, flatten_dtypes,
                                              flatten_shapes):
      assert dtype == output.dtype, '{}: {} vs. {}'.format(
          name, dtype, output.dtype)
      # Pad every output to make shapes fixed according to the corresponding
      # declared shape, since the shapes of outputs are lost through
      # generic_input_op.
      try:
        shape.assert_is_fully_defined()
      except ValueError as e:
        raise ValueError('Invalid shape for %s: %s' % (name, e))
      padded = py_utils.PadOrTrimTo(output, [batch_size] + shape.as_list())
      rets += [padded]

    rets = shapes.Pack(rets)

    # String tensors in rets will be filtered out from being sent to the
    # device automatically, and instead will be present in CPU passthrough.
    return rets
  def _Extract(self, features):
    p = self.params
    points_xyz = tf.reshape(_Dense(features['pointcloud/xyz']), [-1, 3])
    points_feature = tf.reshape(
        _Dense(features['pointcloud/reflectance']), [-1, p.num_features])

    if p.max_num_points is not None:
      npoints = tf.shape(points_xyz)[0]
      points_xyz = py_utils.PadOrTrimTo(points_xyz, [p.max_num_points, 3])
      points_feature = py_utils.PadOrTrimTo(points_feature,
                                            [p.max_num_points, p.num_features])
      points_padding = 1.0 - py_utils.PadOrTrimTo(
          tf.ones([npoints]), [p.max_num_points])

    ret = py_utils.NestedMap(
        points_xyz=points_xyz, points_feature=points_feature)
    if p.max_num_points is not None:
      ret.points_padding = points_padding
    return ret
  def _Extract(self, features):
    """Returns the laser Tensor."""
    p = self.params
    ret = super()._Extract(features)

    all_vxyz = []
    all_classes = []
    for lidar in p.lidar_names:
      for ri in p.lidar_returns:
        feature_name = 'laser_%s_%s' % (lidar, ri)
        laser_data = tf.reshape(
            _Dense(features[feature_name]), [-1, 3 + p.num_features])
        num = py_utils.GetShape(laser_data)[0]
        # We expect lidar_$lidar_$ri and lidar_$lidar_$ri_flow has
        # same number of points.
        feature_name += '_flow'
        laser_data = tf.reshape(_Dense(features[feature_name]), [num, 3 + 1])
        points_vxyz = laser_data[..., 0:3]
        points_classes = laser_data[..., 3]

        all_vxyz += [points_vxyz]
        all_classes += [points_classes]

    # Stack all of the points along the major dimension
    points_vxyz = tf.concat(all_vxyz, axis=0)
    points_class = tf.concat(all_classes, axis=0)

    # The precomputed class uses -1 to mean 5 in our current code.
    points_class = tf.where(
        tf.less(points_class, 0), 5. * tf.ones_like(points_class), points_class)

    if p.max_num_points is not None:
      assert 'points_padding' in ret
      points_vxyz = py_utils.PadOrTrimTo(points_vxyz, [p.max_num_points, 3])
      points_class = py_utils.PadOrTrimTo(points_class, [p.max_num_points])

    assert 'points_xyz' in ret
    ret.world_flow = points_vxyz
    ret.pointwise_class = tf.cast(points_class, tf.int32)
    return ret
예제 #13
0
        def _TokenizeOneSentence(i, strs, token_ids_ta, target_ids_ta,
                                 paddings_ta):
            """Tokenizes a single sentence."""
            ids, _ = self._wpm_encoder.Encode(strs[i])

            if append_eos:
                ids = tf.concat([ids, [self.eos_id]], axis=0)

            # This truncates after the eos is added, so some sentences might
            # not have </s> at the end.
            token_ids_ta = token_ids_ta.write(
                i,
                py_utils.PadOrTrimTo(tf.concat([[self.sos_id], ids], axis=0),
                                     [max_length], self.eos_id))
            target_ids_ta = target_ids_ta.write(
                i, py_utils.PadOrTrimTo(ids, [max_length], self.eos_id))
            paddings_ta = paddings_ta.write(
                i,
                py_utils.PadOrTrimTo(tf.zeros_like(ids, dtype=tf.float32),
                                     [max_length], 1.))

            return i + 1, strs, token_ids_ta, target_ids_ta, paddings_ta
    def _Extract(self, features):
        """Returns the image Tensor."""
        outputs = py_utils.NestedMap()
        p = self.params
        for camera_name in p.camera_names:
            image_shape = tf.reshape(
                _Dense(features['image_%s_shape' % camera_name]), [-1])
            image_shape = tf.cast(image_shape, tf.int32)

            if p.decode_image:
                image = tf.io.decode_png(
                    tf.strings.reduce_join(
                        _Dense(features['image_%s' % camera_name],
                               default_value='')))
                image = tf.reshape(image, image_shape)
                image = py_utils.PadOrTrimTo(image, p.image_shape)

            intrinsics = tf.reshape(
                _Dense(features['camera_%s_intrinsics' % camera_name]), [9])
            extrinsics = tf.reshape(
                _Dense(features['camera_%s_extrinsics' % camera_name]), [4, 4])
            pose = tf.reshape(_Dense(features['image_%s_pose' % camera_name]),
                              [4, 4])
            velocity = tf.reshape(
                _Dense(features['image_%s_velocity' % camera_name]), [6])

            outputs[camera_name] = py_utils.NestedMap()
            if p.decode_image:
                outputs[camera_name]['image'] = tf.cast(
                    image, p.image_output_dtype)
            outputs[camera_name]['image_shape'] = image_shape
            outputs[camera_name]['intrinsics'] = intrinsics
            outputs[camera_name]['extrinsics'] = extrinsics
            outputs[camera_name]['pose'] = pose
            outputs[camera_name]['velocity'] = velocity
            outputs[camera_name]['rolling_shutter_direction'] = features[
                'camera_%s_rolling_shutter_direction' % camera_name]

            for feat in [
                    'shutter', 'camera_trigger_time',
                    'camera_readout_done_time', 'pose_timestamp'
            ]:
                outputs[camera_name][feat] = features['image_%s_%s' %
                                                      (camera_name, feat)]

        return outputs
예제 #15
0
  def BatchedNMSIndices(self,
                        bboxes,
                        scores,
                        nms_iou_threshold=0.3,
                        score_threshold=0.01,
                        max_num_boxes=None):
    """Batched version of NMSIndices.

    Args:
      bboxes: A [batch_size, num_boxes, 7] floating point Tensor of bounding
        boxes in [x, y, z, dx, dy, dz, phi] format.
      scores: A [batch_size, num_boxes, num_classes] floating point Tensor
        containing box scores.
      nms_iou_threshold: IoU threshold to use when determining whether two boxes
        overlap for purposes of suppression.
      score_threshold: The score threshold passed to NMS that allows NMS to
        quickly ignore irrelevant boxes.
      max_num_boxes: The maximum number of boxes per example to emit. If None,
        this value is set to num_boxes from the shape of bboxes.

    Returns:
      The NMS indices and the mask of the padded indices for each example
      in the batch.
    """
    batch_size, num_boxes = py_utils.GetShape(bboxes, 2)

    if max_num_boxes is not None:
      max_output_size = max_num_boxes
    else:
      max_output_size = num_boxes

    output_shape = [batch_size, max_output_size]

    def NMSBody(args):
      bbox, score = args
      return self.NMSIndices(bbox, score, max_output_size, nms_iou_threshold,
                             score_threshold)

    nms_indices, valid_mask = tf.map_fn(
        fn=NMSBody,
        elems=(bboxes, scores),
        dtype=(tf.int32, tf.float32),
        back_prop=False)

    nms_indices = py_utils.PadOrTrimTo(nms_indices, output_shape)
    return nms_indices, valid_mask
예제 #16
0
  def NestedMapFromBatchedOutputs(self, outputs):
    """Create a NestedMap from a list/tuple of batched outputs.

    Args:
      outputs: A tuple or list of Tensors whose order matches the flattened
        structure of Shape() and DType().

    Returns:
      A NestedMap reconstructing the structure of the output of extractors
        and preprocessors, where each Tensor's shape is statically
        padded/trimmed to match the Shape() specification.

    Raises:
      ValueError: If `outputs` contains a shape that is not fully
        defined.
      AssertionError: If any shape of a Tensor in `outputs` cannot be
        PadOrTrimTo'd by the corresponding Shape() specification.
    """
    batch_size = self.InfeedBatchSize()
    shapes = self.Shape()
    shapes.VLog(0, 'input extractor shape: ')
    flatten_shapes = shapes.Flatten()
    dtypes = self.DType()
    flatten_dtypes = dtypes.FlattenItems()
    assert len(flatten_shapes) == len(outputs), '{} vs. {}'.format(
        len(flatten_shapes), len(outputs))
    assert len(flatten_dtypes) == len(outputs), '{} vs. {}'.format(
        len(flatten_dtypes), len(outputs))

    rets = []
    assertion_errors = []
    for (output, (name, dtype), shape) in zip(outputs, flatten_dtypes,
                                              flatten_shapes):
      assert dtype == output.dtype, '{}: {} vs. {}'.format(
          name, dtype, output.dtype)
      # Pad every output to make shapes fixed according to the corresponding
      # declared shape, since the shapes of outputs are lost through
      # generic_input_op.
      try:
        shape.assert_is_fully_defined()
      except ValueError as e:
        raise ValueError('Invalid shape for %s: %s' % (name, e))
      curr_shape = py_utils.GetShape(output)
      padded_shape = shape.as_list()
      if not self.params.batched_input:
        padded_shape = [batch_size] + padded_shape

      try:
        padded = py_utils.PadOrTrimTo(output, padded_shape)
        rets.append(padded)
      except AssertionError as e:
        assertion_errors += [f'{name}: {e}, ({curr_shape} vs. {padded_shape}']

    if assertion_errors:
      raise AssertionError('Mismatched shapes:\n' + '\n'.join(assertion_errors))

    rets = shapes.Pack(rets)

    # String tensors in rets will be filtered out from being sent to the
    # device automatically, and instead will be present in CPU passthrough.
    return rets
예제 #17
0
파일: utils.py 프로젝트: vcj-huy/lingvo
def PadOrTrimDimension(tensor: tf.Tensor, new_size: int,
                       axis: int) -> tf.Tensor:
    tensor.shape.with_rank_at_least(abs(axis))
    shape = py_utils.GetShape(tensor)
    return py_utils.PadOrTrimTo(tensor,
                                shape[:axis] + [new_size] + shape[axis + 1:])
예제 #18
0
    def _Extract(self, features):
        p = self.params

        source_id = py_utils.HasShape(features['image/source_id'], [])
        xmin = _Dense(features['object/image/bbox/xmin'])
        xmax = _Dense(features['object/image/bbox/xmax'])
        ymin = _Dense(features['object/image/bbox/ymin'])
        ymax = _Dense(features['object/image/bbox/ymax'])

        # 2d bounding box in image coordinates.
        bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=1)
        bboxes_count = tf.shape(bboxes)[0]
        bboxes = py_utils.PadOrTrimTo(bboxes, [p.max_num_objects, 4])

        bboxes_padding = 1.0 - py_utils.PadOrTrimTo(tf.ones([bboxes_count]),
                                                    [p.max_num_objects])

        dim_xyz = tf.reshape(_Dense(features['object/velo/bbox/dim_xyz']),
                             [-1, 3])
        loc_xyz = tf.reshape(_Dense(features['object/velo/bbox/xyz']), [-1, 3])
        phi = tf.reshape(_Dense(features['object/velo/bbox/phi']), [-1, 1])
        # bboxes_3d is in [x, y, z, dx, dy, dz, phi].
        bboxes_3d = tf.concat([loc_xyz, dim_xyz, phi], axis=1)

        cx, cy, _, dx, dy, _, _ = tf.unstack(bboxes_3d, num=7, axis=-1)
        bboxes_td = tf.stack([
            cy - dy / 2,
            cx - dx / 2,
            cy + dy / 2,
            cx + dx / 2,
        ],
                             axis=-1)  # pyformat: disable
        bboxes_td = py_utils.PadOrTrimTo(bboxes_td, [p.max_num_objects, 4])

        has_3d_info = tf.cast(_Dense(features['object/has_3d_info']),
                              tf.float32)
        bboxes_3d_mask = py_utils.PadOrTrimTo(has_3d_info, [p.max_num_objects])
        bboxes_td_mask = bboxes_3d_mask

        # Fill in difficulties from bounding box height, truncation and occlusion.
        bb_height = ymax - ymin
        box_image_height = py_utils.PadOrTrimTo(bb_height, [p.max_num_objects])
        box_image_height *= bboxes_3d_mask

        # 0 to 3 indicating occlusion level. 0 means fully visible, 1 means partly,
        occlusion = tf.reshape(_Dense(features['object/occlusion']), [-1])
        occlusion = tf.cast(occlusion, tf.float32)
        occlusion = py_utils.PadOrTrimTo(occlusion, [p.max_num_objects])
        occlusion *= bboxes_3d_mask

        # Truncation: 0 -> not truncated, 1.0 -> truncated
        truncation = tf.reshape(_Dense(features['object/truncation']), [-1])
        truncation = py_utils.PadOrTrimTo(truncation, [p.max_num_objects])
        truncation *= bboxes_3d_mask

        difficulties = ComputeKITTIDifficulties(box_image_height, occlusion,
                                                truncation)
        difficulties = py_utils.PadOrTrimTo(difficulties, [p.max_num_objects])

        # Make a batch axis to call BBoxCorners, and take the first result back.
        bbox3d_corners = geometry.BBoxCorners(bboxes_3d[tf.newaxis, ...])[0]

        # Project the 3D bbox to the image plane.
        velo_to_image_plane = features['transform/velo_to_image_plane']
        bboxes3d_proj_to_image_plane = geometry.PointsToImagePlane(
            tf.reshape(bbox3d_corners, [-1, 3]), velo_to_image_plane)

        # Output is [num_objects, 8 corners per object, (x, y)].
        bboxes3d_proj_to_image_plane = tf.reshape(bboxes3d_proj_to_image_plane,
                                                  [-1, 8, 2])
        bboxes3d_proj_to_image_plane = py_utils.PadOrTrimTo(
            bboxes3d_proj_to_image_plane, [p.max_num_objects, 8, 2])

        texts = features['object/label'].values
        labels = ops.static_map_string_int(x=texts,
                                           keys=self.KITTI_CLASS_NAMES)

        labels = py_utils.PadOrTrimTo(labels, [p.max_num_objects])
        texts = py_utils.PadOrTrimTo(texts, [p.max_num_objects])

        # Filter labels by setting bboxes_padding, bboxes_3d_mask, and
        # bboxes_td_mask appropriately.
        if p.filter_labels is not None:
            valid_labels = tf.constant([p.filter_labels])
            bbox_mask = tf.reduce_any(tf.equal(tf.expand_dims(labels, 1),
                                               valid_labels),
                                      axis=1)
            bbox_mask = tf.cast(bbox_mask, tf.float32)
            bboxes_padding = 1 - bbox_mask * (1 - bboxes_padding)
            filtered_bboxes_3d_mask = bboxes_3d_mask * bbox_mask
            bboxes_td_mask *= bbox_mask
        else:
            filtered_bboxes_3d_mask = bboxes_3d_mask

        # Placeholder for counting the number of laser points that reside within
        # each 3-d bounding box. This must be filled in outside of this function
        # based on the loaded 3-d laser points.
        bboxes_3d_num_points = tf.zeros([p.max_num_objects], dtype=tf.int32)
        bboxes_3d_num_points = py_utils.PadOrTrimTo(bboxes_3d_num_points,
                                                    [p.max_num_objects])

        # Pad bboxes_3d.
        bboxes_3d = py_utils.PadOrTrimTo(bboxes_3d, [p.max_num_objects, 7])

        return py_utils.NestedMap(
            source_id=source_id,
            bboxes_count=bboxes_count,
            bboxes=bboxes,
            bboxes_padding=bboxes_padding,
            bboxes_3d=bboxes_3d,
            bboxes_3d_mask=filtered_bboxes_3d_mask,
            unfiltered_bboxes_3d_mask=bboxes_3d_mask,
            bboxes3d_proj_to_image_plane=bboxes3d_proj_to_image_plane,
            bboxes_td=bboxes_td,
            bboxes_td_mask=bboxes_td_mask,
            bboxes_3d_num_points=bboxes_3d_num_points,
            labels=labels,
            texts=texts,
            box_image_height=box_image_height,
            occlusion=occlusion,
            truncation=truncation,
            difficulties=difficulties)