Ejemplo n.º 1
0
def saveTrainDataForSample(samples):
    # TODO make this write tensors to file.
    tensors = []
    for sample in samples:
        # pre-process data
        import time
        testSampleLidarPoints = combine_lidar_data(sample, Constants.dataDir)
        startTime = time.time()
        testVFEPoints = VFE_preprocessing(testSampleLidarPoints,
                                          Constants.voxelx, Constants.voxely,
                                          Constants.voxelz,
                                          Constants.maxPoints,
                                          Constants.nx // 2, Constants.ny // 2,
                                          Constants.nz)
        endTime = time.time()
        print(endTime - startTime)
        print(testVFEPoints.shape)
        # Turn into 6 rank tensor, then convert it to dense because keras is stupid
        testVFEPoints = sparse.reshape(testVFEPoints,
                                       (1, ) + testVFEPoints.shape)
        testVFEPointsDense = sparse.to_dense(testVFEPoints,
                                             default_value=0.,
                                             validate_indices=False)
        tensors.append(testVFEPointsDense)
    # return tf.stack(tensors)
    tf.stack(tensors)
Ejemplo n.º 2
0
def main2(sample):
    # Set constants
    dataDir = 'C:\\Users\\pmwws\\Documents\\ML project\\3d-object-detection-for-autonomous-vehicles'
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    # pre-process data
    import time
    testSampleLidarPoints = combine_lidar_data(sample, dataDir)
    startTime = time.time()
    testVFEPoints = VFE_preprocessing(testSampleLidarPoints, voxelx, voxely,
                                      voxelz, maxPoints, nx // 2, ny // 2, nz)
    endTime = time.time()
    print(endTime - startTime)
    print(testVFEPoints.shape)
    # Turn into 6 rank tensor, then convert it to dense because keras is stupid
    testVFEPoints = sparse.reshape(testVFEPoints, (1, ) + testVFEPoints.shape)
    testVFEPointsDense = sparse.to_dense(testVFEPoints,
                                         default_value=0.,
                                         validate_indices=False)

    # pre-process labels
    labels = []
    annsTokens = sample['anns']
    for token in annsTokens:
        ann = level5Data.get('sample_annotation', token)
        row = ann['translation']
        row += ann['size']
        quaternion = Quaternion(ann['rotation'])
        row += [quaternion.yaw_pitch_roll[0]]
        instance = level5Data.get('instance', ann['instance_token'])
        category = level5Data.get('category',
                                  instance['category_token'])['name']
        row += [catToNum[category]]
        labels.append(row)
    labels = np.array(labels)
    outClass, outRegress = preprocessLabels(labels)
    outClass = np.reshape(outClass, (1, ) + outClass.shape)
    outRegress = np.reshape(outRegress, (1, ) + outRegress.shape)

    # create model
    with tf.device('/device:CPU:0'):
        # model = createModel(nx, ny, nz, maxPoints)
        # plot_model(model, show_shapes=True)
        # sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        # model.compile(optimizers=sgd, loss=['mse', 'mse'])
        model = load_model('models\\Epoch5.h5',
                           custom_objects={
                               'RepeatLayer': RepeatLayer,
                               'MaxPoolingVFELayer': MaxPoolingVFELayer
                           })

        # fit model
        history = model.fit(x=testVFEPointsDense,
                            y=[outClass, outRegress],
                            batch_size=1,
                            verbose=1,
                            epochs=1)

        print(history.history)
        model.save('models\\Epoch6.h5')
Ejemplo n.º 3
0
def predictMain(samples, outPath, level5Data, model):
    import time
    # Set constants
    dataDir = 'E:\\CS539 Machine Learning\\3d-object-detection-for-autonomous-vehicles'
    # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    points = []
    # for sample in samples:
    for i in range(len(samples)):
        # pre-process data
        sampleLidarPoints = combine_lidar_data(samples[i], dataDir, level5Data)
        startTime = time.time()
        trainVFEPoints = VFE_preprocessing(sampleLidarPoints, Constants.voxelx,
                                           Constants.voxely, Constants.voxelz,
                                           Constants.maxPoints,
                                           Constants.nx // 2,
                                           Constants.ny // 2, Constants.nz)
        trainVFEPoints = sparse.reshape(trainVFEPoints,
                                        (1, ) + trainVFEPoints.shape)
        testVFEPointsDense = sparse.to_dense(trainVFEPoints,
                                             default_value=0.,
                                             validate_indices=False)
        # points.append(testVFEPointsDense)
        endTime = time.time()
        print(endTime - startTime)
        print('finished ' + str(i))
        # Turn into 6 rank tensor, then convert it to dense because keras is stupid
        # testVFEPoints = sparse.reshape(testVFEPoints, (1,) + testVFEPoints.shape)
        # testVFEPointsDense = sparse.to_dense(testVFEPoints, default_value=0., validate_indices=False)
        prob, regress = model.predict(testVFEPointsDense)
        np.save(outPath + '\\sample' + str(i) + '_label.npy', prob)
        np.save(outPath + '\\sample' + str(i) + '_regress.npy', regress)
Ejemplo n.º 4
0
    def get_feature(self, feature_info, extracted_features, sequence_size):
        """
        Fetch the feature from the feature dictionary of extracted features
        Parameters
        ----------
        feature_info: dict
            Feature configuration information for the feature as specified in the feature_config
        extracted_features: dict
            Dictionary of feature tensors extracted by parsing the serialized TFRecord
        sequence_size: int, optional
            Number of elements in the sequence of a SequenceExample
        Returns
        -------
        tf.Tensor
            Feature tensor that is obtained from the extracted features for the given
            feature_info
        """
        extracted_context_features, extracted_sequence_features = extracted_features

        default_tensor = self.get_default_tensor(feature_info, sequence_size)

        if feature_info["tfrecord_type"] == SequenceExampleTypeKey.CONTEXT:
            feature_tensor = extracted_context_features.get(
                feature_info["name"], default_tensor)
            # Adjust shape
            feature_tensor = tf.expand_dims(feature_tensor, axis=0)
        else:
            feature_tensor = extracted_sequence_features.get(
                feature_info["name"], default_tensor)
            if isinstance(feature_tensor, sparse.SparseTensor):
                feature_tensor = sparse.reset_shape(feature_tensor)
                feature_tensor = sparse.to_dense(feature_tensor)
                feature_tensor = tf.squeeze(feature_tensor, axis=0)

        return feature_tensor
Ejemplo n.º 5
0
    def get_feature(self, feature_info, extracted_features, sequence_size=0):
        """
        Fetch the feature from the feature dictionary of extracted features

        Parameters
        ----------
        feature_info: dict
            Feature configuration information for the feature as specified in the feature_config
        extracted_features: dict
            Dictionary of feature tensors extracted by parsing the serialized TFRecord
        sequence_size: int, optional
            Number of elements in the sequence of a SequenceExample

        Returns
        -------
        tf.Tensor
            Feature tensor that is obtained from the extracted features for the given
            feature_info
        """
        default_tensor = self.get_default_tensor(feature_info, sequence_size)

        feature_tensor = extracted_features.get(feature_info["name"], default_tensor)
        if isinstance(feature_tensor, tf.sparse.SparseTensor):
            feature_tensor = sparse.to_dense(sparse.reset_shape(feature_tensor))

            """
            NOTE: If a feature is in the features_spec, then it gets retrieved
            as an empty sparse tensor. So we need to replace with default tensor
            """
            if tf.size(feature_tensor) == tf.constant(0):
                feature_tensor = default_tensor

        return feature_tensor
Ejemplo n.º 6
0
def train_with_model(samples, level5Data, model_path, save_path):
    labels_dir = 'labels3'

    points = []
    # for sample in samples:
    for i in range(len(samples)):
        # pre-process data
        sampleLidarPoints = combine_lidar_data(samples[i],
                                               Constants.lyft_data_dir,
                                               level5Data)
        startTime = time.time()
        vfe_points = VFE_preprocessing(sampleLidarPoints, Constants.voxelx,
                                       Constants.voxely, Constants.voxelz,
                                       Constants.maxPoints, Constants.nx // 2,
                                       Constants.ny // 2, Constants.nz)
        # Convert to dense here because keras won't take sparse tensors
        vfe_points_dense = sparse.to_dense(vfe_points,
                                           default_value=0.,
                                           validate_indices=False)
        points.append(vfe_points_dense)
        endTime = time.time()
        print(endTime - startTime)
        print('finished ' + str(i))
    # Turn into 6 rank tensor, then convert it to dense because keras is stupid
    trainPoints = tf.stack(points, axis=0)

    print('loading labels')
    # get labels from file
    outClass = np.load(labels_dir + '\\labelsClass.npy', allow_pickle=True)
    outRegress = np.load(labels_dir + '\\regressClass.npy', allow_pickle=True)

    # load model
    model = load_model(model_path,
                       custom_objects={
                           'RepeatLayer': RepeatLayer,
                           'MaxPoolingVFELayer': MaxPoolingVFELayer
                       })
    sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss=['mse', 'mse'])

    # fit model
    history = model.fit(x=trainPoints,
                        y=[outClass, outRegress],
                        batch_size=1,
                        verbose=1,
                        epochs=1,
                        steps_per_epoch=180)

    print(history.history)
    model.save(save_path)
Ejemplo n.º 7
0
def train(samples, level5Data, save_path):
    # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    labels_dir = 'labels3'

    points = []
    # for sample in samples:
    for i in range(len(samples)):
        # pre-process data
        sampleLidarPoints = combine_lidar_data(samples[i],
                                               Constants.lyft_data_dir,
                                               level5Data)
        startTime = time.time()
        vfe_points = VFE_preprocessing(sampleLidarPoints, Constants.voxelx,
                                       Constants.voxely, Constants.voxelz,
                                       Constants.maxPoints, Constants.nx // 2,
                                       Constants.ny // 2, Constants.nz)
        # Need to convert to dense tensors because keras doesn't allow for sparse tensors.
        vfe_points_dense = sparse.to_dense(vfe_points,
                                           default_value=0.,
                                           validate_indices=False)
        points.append(vfe_points_dense)
        endTime = time.time()
        print(endTime - startTime)
        print('finished ' + str(i))
    # Stack into 6 rank tensor
    trainPoints = tf.stack(points, axis=0)

    print('loading labels')
    # get labels from file
    outClass = np.load(labels_dir + '\\labelsClass.npy', allow_pickle=True)
    outRegress = np.load(labels_dir + '\\regressClass.npy', allow_pickle=True)

    # create model
    model = createModel(Constants.nx, Constants.ny, Constants.nz,
                        Constants.maxPoints)
    # plot_model(model, show_shapes=True)
    sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss=['mse', 'mse'])

    # fit model
    history = model.fit(x=trainPoints,
                        y=[outClass, outRegress],
                        batch_size=1,
                        verbose=1,
                        epochs=1,
                        steps_per_epoch=180)

    print(history.history)
    model.save(save_path)
Ejemplo n.º 8
0
    def get_feature(self, feature_info, extracted_features, sequence_size):
        """
        Fetch the feature from the feature dictionary of extracted features

        Parameters
        ----------
        feature_info: dict
            Feature configuration information for the feature as specified in the feature_config
        extracted_features: dict
            Dictionary of feature tensors extracted by parsing the serialized TFRecord
        sequence_size: int, optional
            Number of elements in the sequence of a SequenceExample

        Returns
        -------
        tf.Tensor
            Feature tensor that is obtained from the extracted features for the given
            feature_info
        """
        extracted_context_features, extracted_sequence_features = extracted_features

        default_tensor = self.get_default_tensor(feature_info, sequence_size)

        if feature_info["tfrecord_type"] == SequenceExampleTypeKey.CONTEXT:
            feature_tensor = extracted_context_features.get(
                feature_info["name"], default_tensor)
            default_shape = [feature_info.get("max_len", 1)]
        else:
            feature_tensor = extracted_sequence_features.get(
                feature_info["name"], default_tensor)
            default_shape = [sequence_size, feature_info.get("max_len", 1)]

        if isinstance(feature_tensor, sparse.SparseTensor):
            """
            NOTE: Since we define the features as VarLenFeature in
            features spec, the extracted feature tensors will be sparse.
            Here, we convert them into dense tensors and also pad accordingly.
            """
            feature_tensor = sparse.reset_shape(feature_tensor,
                                                new_shape=default_shape)
            feature_tensor = sparse.to_dense(
                feature_tensor,
                default_value=self.feature_config.get_default_value(
                    feature_info))

        return feature_tensor
Ejemplo n.º 9
0
  def testNDimension(self):
    with self.cached_session() as sess:
      content = [["1 1:3.4 2:0.5 4:0.231", "1 1:3.4 2:0.5 4:0.231"],
                 ["1 2:2.5 3:inf 5:0.503", "1 2:2.5 3:inf 5:0.503"],
                 ["2 3:2.5 2:nan 1:0.105", "2 3:2.5 2:nan 1:0.105"]]
      sparse_features, labels = libsvm_dataset_ops.decode_libsvm(
          content, num_features=6, label_dtype=dtypes.float64)
      features = sparse.to_dense(
          sparse_features, validate_indices=False)

      self.assertAllEqual(labels.get_shape().as_list(), [3, 2])

      features, labels = sess.run([features, labels])
      self.assertAllEqual(labels, [[1, 1], [1, 1], [2, 2]])
      self.assertAllClose(
          features, [[[0, 3.4, 0.5, 0, 0.231, 0], [0, 3.4, 0.5, 0, 0.231, 0]], [
              [0, 0, 2.5, np.inf, 0, 0.503], [0, 0, 2.5, np.inf, 0, 0.503]
          ], [[0, 0.105, np.nan, 2.5, 0, 0], [0, 0.105, np.nan, 2.5, 0, 0]]])
Ejemplo n.º 10
0
    def testBasic(self):
        with self.cached_session() as sess:
            content = [
                "1 1:3.4 2:0.5 4:0.231", "1 2:2.5 3:inf 5:0.503",
                "2 3:2.5 2:nan 1:0.105"
            ]
            sparse_features, labels = libsvm_io.decode_libsvm(content,
                                                              num_features=6)
            features = sparse.to_dense(sparse_features, validate_indices=False)

            self.assertAllEqual(labels.get_shape().as_list(), [3])

            features, labels = sess.run([features, labels])
            self.assertAllEqual(labels, [1, 1, 2])
            self.assertAllClose(
                features,
                [[0, 3.4, 0.5, 0, 0.231, 0], [0, 0, 2.5, np.inf, 0, 0.503],
                 [0, 0.105, np.nan, 2.5, 0, 0]])
Ejemplo n.º 11
0
 def test_dataset(self):
     """test_dataset"""
     libsvm_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                "test_libsvm", "sample")
     dataset = libsvm_io.make_libsvm_dataset(libsvm_file, num_features=6)
     iterator = dataset.make_initializable_iterator()
     init_op = iterator.initializer
     sparse_features, labels = iterator.get_next()
     features = sparse.to_dense(sparse_features, validate_indices=False)
     with self.cached_session() as sess:
         sess.run(init_op)
         f, l = sess.run([features, labels])
         self.assertAllEqual(l, [1])
         self.assertAllClose(f, [[0, 3.4, 0.5, 0, 0.231, 0]])
         f, l = sess.run([features, labels])
         self.assertAllEqual(l, [1])
         self.assertAllClose(f, [[0, 0, 2.5, np.inf, 0, 0.503]])
         f, l = sess.run([features, labels])
         self.assertAllEqual(l, [2])
         self.assertAllClose(f, [[0, 0.105, np.nan, 2.5, 0, 0]])
         with self.assertRaises(errors.OutOfRangeError):
             sess.run([features, labels])
Ejemplo n.º 12
0
    def _parse_sequence_example_fn(sequence_example_proto):
        """
        Parse the input `tf.SequenceExample` proto using the features_spec

        Parameters
        ----------
        sequence_example_proto : string
            serialized tfrecord SequenceExample protobuf message

        Returns
        -------
        features : dict
            parsed features as `tf.Tensor` objects extracted from the protobuf
        labels : `tf.Tensor`
            parsed label as a `tf.Tensor` object extracted from the protobuf
        """
        context_features, sequence_features = io.parse_single_sequence_example(
            serialized=sequence_example_proto,
            context_features=context_features_spec,
            sequence_features=sequence_features_spec,
        )

        features_dict = dict()

        # Handle context features
        for feature_info in feature_config.get_context_features():
            feature_node_name = feature_info.get("node_name", feature_info["name"])

            default_tensor = tf.constant(
                value=feature_config.get_default_value(feature_info), dtype=feature_info["dtype"],
            )
            feature_tensor = context_features.get(feature_info["name"], default_tensor)

            feature_tensor = tf.expand_dims(feature_tensor, axis=0)

            # Preprocess features
            feature_tensor = preprocess_feature(feature_tensor, feature_info, preprocessing_map)

            features_dict[feature_node_name] = feature_tensor

        # Define mask to identify padded sequence
        if required_fields_only and not feature_config.get_rank("serving_info")["required"]:
            """
            Define dummy mask if the rank field is not a required field for serving

            NOTE:
            This masks all max_sequence_size as 1 as there is no real way to know
            the number of sequence in the query. There is no predefined required field,
            and hence we would need to do a full pass of all features to find the record shape.
            This approach might be unstable if different features have different shapes.

            Hence we just mask all sequence
            """
            features_dict["mask"] = tf.constant(
                value=1, shape=[max_sequence_size], dtype=feature_config.get_rank("dtype")
            )
            sequence_size = tf.constant(max_sequence_size, dtype=tf.int64)
        else:
            # Typically used at training time, to pad/clip to a fixed number of sequence per query

            # Use rank as a reference tensor to infer shape/sequence_size in query
            reference_tensor = sequence_features.get(feature_config.get_rank(key="node_name"))

            # Add mask for identifying padded sequence
            mask = tf.ones_like(sparse.to_dense(sparse.reset_shape(reference_tensor)))
            sequence_size = tf.cast(tf.reduce_sum(mask), tf.int64)

            if pad_sequence:
                mask = tf.expand_dims(mask, axis=-1)

                def crop_fn():
                    tf.print("\n[WARN] Bad query found. Number of sequence : ", tf.shape(mask)[1])
                    return image.crop_to_bounding_box(
                        mask,
                        offset_height=0,
                        offset_width=0,
                        target_height=1,
                        target_width=max_sequence_size,
                    )

                mask = tf.cond(
                    tf.shape(mask)[1] <= max_sequence_size,
                    # Pad if there are missing sequence
                    lambda: image.pad_to_bounding_box(
                        mask,
                        offset_height=0,
                        offset_width=0,
                        target_height=1,
                        target_width=max_sequence_size,
                    ),
                    # Crop if there are extra sequence
                    crop_fn,
                )
                mask = tf.squeeze(mask)
            else:
                mask = tf.squeeze(mask, axis=0)

            # Check validity of mask
            tf.debugging.assert_greater(sequence_size, tf.constant(0, dtype=tf.int64))

            features_dict["mask"] = mask
            sequence_size = max_sequence_size if pad_sequence else sequence_size

        # Pad sequence features to max_sequence_size
        for feature_info in feature_config.get_sequence_features():
            feature_node_name = feature_info.get("node_name", feature_info["name"])

            default_tensor = tf.fill(
                value=tf.constant(
                    value=feature_config.get_default_value(feature_info),
                    dtype=feature_info["dtype"],
                ),
                dims=[max_sequence_size if pad_sequence else sequence_size],
            )
            feature_tensor = sequence_features.get(feature_info["name"], default_tensor)

            if isinstance(feature_tensor, sparse.SparseTensor):
                feature_tensor = sparse.reset_shape(
                    feature_tensor,
                    new_shape=[1, max_sequence_size if pad_sequence else sequence_size],
                )
                feature_tensor = sparse.to_dense(feature_tensor)
                feature_tensor = tf.squeeze(feature_tensor, axis=0)

            # Preprocess features
            feature_tensor = preprocess_feature(feature_tensor, feature_info, preprocessing_map)

            features_dict[feature_node_name] = feature_tensor

        labels = features_dict.pop(feature_config.get_label(key="name"))

        return features_dict, labels
Ejemplo n.º 13
0
    def generate_and_add_mask(self, extracted_features, features_dict):
        """
        Create a mask to identify padded values

        Parameters
        ----------
        extracted_features: dict
            Dictionary of tensors extracted from the serialized TFRecord
        features_dict: dict
            Dictionary of tensors that will be used for model training/serving
            as inputs to the model

        Returns
        -------
        features_dict: dict
            Dictionary of tensors that will be used for model training/serving updated
            with the mask tensor if applicable
        sequence_size: int
            Number of elements in the sequence of the TFRecord
        """
        context_features, sequence_features = extracted_features
        if (self.required_fields_only and
                not self.feature_config.get_rank("serving_info")["required"]):
            """
            Define dummy mask if the rank field is not a required field for serving
            NOTE:
            This masks all max_sequence_size as 1 as there is no real way to know
            the number of sequence in the query. There is no predefined required field,
            and hence we would need to do a full pass of all features to find the record shape.
            This approach might be unstable if different features have different shapes.
            Hence we just mask all sequence
            """
            mask = tf.constant(
                value=1,
                shape=[self.max_sequence_size],
                dtype=self.feature_config.get_rank("dtype"),
            )
            sequence_size = tf.constant(self.max_sequence_size, dtype=tf.int64)
        else:
            # Typically used at training time, to pad/clip to a fixed number of sequence per query

            # Use rank as a reference tensor to infer shape/sequence_size in query
            reference_tensor = sequence_features.get(
                self.feature_config.get_rank(key="node_name"))

            # Add mask for identifying padded sequence
            mask = tf.ones_like(
                sparse.to_dense(sparse.reset_shape(reference_tensor)))

            if self.pad_sequence:
                mask = tf.squeeze(mask, axis=0)

                def crop_fn():
                    # NOTE: We currently ignore these cases as there is no clear
                    # way to select max_sequence_size from all the sequence features
                    tf.print("\n[WARN] Bad query found. Number of sequence : ",
                             tf.shape(mask)[0])
                    return mask

                mask = tf.cond(
                    tf.shape(mask)[0] <= self.max_sequence_size,
                    # Pad if there are missing sequence
                    lambda: tf.pad(mask, [[
                        0, self.max_sequence_size - tf.shape(mask)[0]
                    ]]),
                    # Crop if there are extra sequence
                    crop_fn,
                )
                sequence_size = tf.constant(self.max_sequence_size,
                                            dtype=tf.int64)
            else:
                mask = tf.squeeze(mask, axis=0)
                sequence_size = tf.cast(tf.reduce_sum(mask), tf.int64)

        # Check validity of mask
        tf.debugging.assert_greater(sequence_size,
                                    tf.constant(0, dtype=tf.int64))

        # Update features dictionary with the computed mask tensor
        features_dict["mask"] = mask

        return features_dict, sequence_size
Ejemplo n.º 14
0
    def _parse_sequence_example_fn(sequence_example_proto):
        """
        Parse the input `tf.Example` proto using the features_spec

        Args:
            sequence_example_proto: tfrecord SequenceExample protobuf data

        Returns:
            features: parsed features extracted from the protobuf
            labels: parsed label extracted from the protobuf
        """
        context_features, sequence_features = io.parse_single_sequence_example(
            serialized=sequence_example_proto,
            context_features=context_features_spec,
            sequence_features=sequence_features_spec,
        )

        features_dict = dict()

        # Explode context features into all records
        for feature_info in feature_config.get_context_features():
            feature_node_name = feature_info.get("node_name",
                                                 feature_info["name"])
            feature_layer_info = feature_info.get("feature_layer_info")

            feature_tensor = context_features.get(feature_node_name)

            feature_tensor = tf.expand_dims(feature_tensor, axis=0)
            feature_tensor = tf.tile(feature_tensor,
                                     multiples=[max_num_records])

            # If feature is a string, then decode into numbers
            if feature_layer_info["type"] == FeatureTypeKey.STRING:
                feature_tensor = io.decode_raw(
                    feature_tensor,
                    out_type=tf.uint8,
                    fixed_length=feature_layer_info["max_length"],
                )
                feature_tensor = tf.cast(feature_tensor, tf.float32)

            features_dict[feature_node_name] = feature_tensor

        # Pad sequence features to max_num_records
        for feature_info in feature_config.get_sequence_features():
            feature_node_name = feature_info.get("node_name",
                                                 feature_info["name"])
            feature_layer_info = feature_info["feature_layer_info"]

            feature_tensor = sequence_features.get(feature_node_name)

            if isinstance(feature_tensor, sparse.SparseTensor):
                if feature_node_name == feature_config.get_rank(
                        key="node_name"):
                    # Add mask for identifying padded records
                    mask = tf.ones_like(
                        sparse.to_dense(sparse.reset_shape(feature_tensor)))
                    mask = tf.expand_dims(mask, axis=2)

                    def crop_fn():
                        tf.print(
                            "\n[WARN] Bad query found. Number of records : ",
                            tf.shape(mask)[1])
                        return image.crop_to_bounding_box(
                            mask,
                            offset_height=0,
                            offset_width=0,
                            target_height=1,
                            target_width=max_num_records,
                        )

                    mask = tf.cond(
                        tf.shape(mask)[1] < max_num_records,
                        # Pad if there are missing records
                        lambda: image.pad_to_bounding_box(
                            mask,
                            offset_height=0,
                            offset_width=0,
                            target_height=1,
                            target_width=max_num_records,
                        ),
                        # Crop if there are extra records
                        crop_fn,
                    )
                    mask = tf.squeeze(mask)

                    # Check validity of mask
                    tf.debugging.assert_greater(
                        tf.cast(tf.reduce_sum(mask), tf.float32),
                        tf.constant(0.0))

                    features_dict["mask"] = mask

                feature_tensor = sparse.reset_shape(
                    feature_tensor, new_shape=[1, max_num_records])
                feature_tensor = sparse.to_dense(feature_tensor)
                feature_tensor = tf.squeeze(feature_tensor)

                # If feature is a string, then decode into numbers
                if feature_layer_info["type"] == FeatureTypeKey.STRING:
                    feature_tensor = io.decode_raw(
                        feature_tensor,
                        out_type=tf.uint8,
                        fixed_length=feature_layer_info["max_length"],
                    )
                    feature_tensor = tf.cast(feature_tensor, tf.float32)
            else:
                raise ValueError("Invalid input : {}".format(feature_name))

            features_dict[feature_node_name] = feature_tensor

        labels = features_dict.pop(feature_config.get_label(key="name"))

        # Check if label is one-hot and correctly masked
        tf.debugging.assert_equal(tf.cast(tf.reduce_sum(labels), tf.float32),
                                  tf.constant(1.0))

        return features_dict, labels
Ejemplo n.º 15
0
    def _parse_sequence_example_fn(sequence_example_proto):
        """
        Parse the input `tf.Example` proto using the features_spec

        Args:
            sequence_example_proto: tfrecord SequenceExample protobuf data

        Returns:
            TODO(ashish): note - "features" is not a Features object.  It's a {feat_name: tf.Tensor} mapping
            (so perhaps a bad name?)
            features: parsed features extracted from the protobuf
            labels: parsed label extracted from the protobuf
        """
        context, examples = io.parse_single_sequence_example(
            serialized=sequence_example_proto,
            context_features=context_features_spec,
            sequence_features=sequence_features_spec,
        )

        features = dict()

        # Explode context features into all records
        for feat, t in context.items():
            t = tf.expand_dims(t, axis=0)
            t = tf.tile(t, multiples=[max_num_records])

            # If feature is a string, then decode into numbers
            if feature_config.get_dict(
            )[feat]["type"] == FeatureTypeKey.STRING:
                t = io.decode_raw(
                    t,
                    out_type=tf.uint8,
                    fixed_length=feature_config.get_dict()[feat]["max_length"],
                )
                t = tf.cast(t, tf.float32)

            features[feat] = t

        # Pad sequence features to max_num_records
        for feat, t in examples.items():
            if isinstance(t, sparse.SparseTensor):
                if feat == "pos":
                    # Add mask for identifying padded records
                    mask = tf.ones_like(sparse.to_dense(sparse.reset_shape(t)))
                    mask = tf.expand_dims(mask, axis=2)
                    mask = image.pad_to_bounding_box(
                        mask,
                        offset_height=0,
                        offset_width=0,
                        target_height=1,
                        target_width=max_num_records,
                    )
                    features["mask"] = tf.squeeze(mask)

                t = sparse.reset_shape(t, new_shape=[1, max_num_records])
                t = sparse.to_dense(t)
                t = tf.squeeze(t)

                # If feature is a string, then decode into numbers
                if feature_config.get_dict(
                )[feat]["type"] == FeatureTypeKey.STRING:
                    t = io.decode_raw(
                        t,
                        out_type=tf.uint8,
                        fixed_length=feature_config.get_dict()[feat]
                        ["max_length"],
                    )
                    t = tf.cast(t, tf.float32)
            else:
                #
                # Handle dense tensors
                #
                # if len(t.shape) == 1:
                #     t = tf.expand_dims(t, axis=0)
                # if len(t.shape) == 2:
                #     t = tf.pad(t, paddings=[[0, 0], [0, max_num_records]])
                #     t = tf.squeeze(t)
                # else:
                #     raise Exception('Invalid input : {}'.format(feat))
                raise ValueError("Invalid input : {}".format(feat))

            features[feat] = t

        labels = features.pop(feature_config.label)
        return features, labels