def saveTrainDataForSample(samples): # TODO make this write tensors to file. tensors = [] for sample in samples: # pre-process data import time testSampleLidarPoints = combine_lidar_data(sample, Constants.dataDir) startTime = time.time() testVFEPoints = VFE_preprocessing(testSampleLidarPoints, Constants.voxelx, Constants.voxely, Constants.voxelz, Constants.maxPoints, Constants.nx // 2, Constants.ny // 2, Constants.nz) endTime = time.time() print(endTime - startTime) print(testVFEPoints.shape) # Turn into 6 rank tensor, then convert it to dense because keras is stupid testVFEPoints = sparse.reshape(testVFEPoints, (1, ) + testVFEPoints.shape) testVFEPointsDense = sparse.to_dense(testVFEPoints, default_value=0., validate_indices=False) tensors.append(testVFEPointsDense) # return tf.stack(tensors) tf.stack(tensors)
def main2(sample): # Set constants dataDir = 'C:\\Users\\pmwws\\Documents\\ML project\\3d-object-detection-for-autonomous-vehicles' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # pre-process data import time testSampleLidarPoints = combine_lidar_data(sample, dataDir) startTime = time.time() testVFEPoints = VFE_preprocessing(testSampleLidarPoints, voxelx, voxely, voxelz, maxPoints, nx // 2, ny // 2, nz) endTime = time.time() print(endTime - startTime) print(testVFEPoints.shape) # Turn into 6 rank tensor, then convert it to dense because keras is stupid testVFEPoints = sparse.reshape(testVFEPoints, (1, ) + testVFEPoints.shape) testVFEPointsDense = sparse.to_dense(testVFEPoints, default_value=0., validate_indices=False) # pre-process labels labels = [] annsTokens = sample['anns'] for token in annsTokens: ann = level5Data.get('sample_annotation', token) row = ann['translation'] row += ann['size'] quaternion = Quaternion(ann['rotation']) row += [quaternion.yaw_pitch_roll[0]] instance = level5Data.get('instance', ann['instance_token']) category = level5Data.get('category', instance['category_token'])['name'] row += [catToNum[category]] labels.append(row) labels = np.array(labels) outClass, outRegress = preprocessLabels(labels) outClass = np.reshape(outClass, (1, ) + outClass.shape) outRegress = np.reshape(outRegress, (1, ) + outRegress.shape) # create model with tf.device('/device:CPU:0'): # model = createModel(nx, ny, nz, maxPoints) # plot_model(model, show_shapes=True) # sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) # model.compile(optimizers=sgd, loss=['mse', 'mse']) model = load_model('models\\Epoch5.h5', custom_objects={ 'RepeatLayer': RepeatLayer, 'MaxPoolingVFELayer': MaxPoolingVFELayer }) # fit model history = model.fit(x=testVFEPointsDense, y=[outClass, outRegress], batch_size=1, verbose=1, epochs=1) print(history.history) model.save('models\\Epoch6.h5')
def predictMain(samples, outPath, level5Data, model): import time # Set constants dataDir = 'E:\\CS539 Machine Learning\\3d-object-detection-for-autonomous-vehicles' # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' points = [] # for sample in samples: for i in range(len(samples)): # pre-process data sampleLidarPoints = combine_lidar_data(samples[i], dataDir, level5Data) startTime = time.time() trainVFEPoints = VFE_preprocessing(sampleLidarPoints, Constants.voxelx, Constants.voxely, Constants.voxelz, Constants.maxPoints, Constants.nx // 2, Constants.ny // 2, Constants.nz) trainVFEPoints = sparse.reshape(trainVFEPoints, (1, ) + trainVFEPoints.shape) testVFEPointsDense = sparse.to_dense(trainVFEPoints, default_value=0., validate_indices=False) # points.append(testVFEPointsDense) endTime = time.time() print(endTime - startTime) print('finished ' + str(i)) # Turn into 6 rank tensor, then convert it to dense because keras is stupid # testVFEPoints = sparse.reshape(testVFEPoints, (1,) + testVFEPoints.shape) # testVFEPointsDense = sparse.to_dense(testVFEPoints, default_value=0., validate_indices=False) prob, regress = model.predict(testVFEPointsDense) np.save(outPath + '\\sample' + str(i) + '_label.npy', prob) np.save(outPath + '\\sample' + str(i) + '_regress.npy', regress)
def get_feature(self, feature_info, extracted_features, sequence_size): """ Fetch the feature from the feature dictionary of extracted features Parameters ---------- feature_info: dict Feature configuration information for the feature as specified in the feature_config extracted_features: dict Dictionary of feature tensors extracted by parsing the serialized TFRecord sequence_size: int, optional Number of elements in the sequence of a SequenceExample Returns ------- tf.Tensor Feature tensor that is obtained from the extracted features for the given feature_info """ extracted_context_features, extracted_sequence_features = extracted_features default_tensor = self.get_default_tensor(feature_info, sequence_size) if feature_info["tfrecord_type"] == SequenceExampleTypeKey.CONTEXT: feature_tensor = extracted_context_features.get( feature_info["name"], default_tensor) # Adjust shape feature_tensor = tf.expand_dims(feature_tensor, axis=0) else: feature_tensor = extracted_sequence_features.get( feature_info["name"], default_tensor) if isinstance(feature_tensor, sparse.SparseTensor): feature_tensor = sparse.reset_shape(feature_tensor) feature_tensor = sparse.to_dense(feature_tensor) feature_tensor = tf.squeeze(feature_tensor, axis=0) return feature_tensor
def get_feature(self, feature_info, extracted_features, sequence_size=0): """ Fetch the feature from the feature dictionary of extracted features Parameters ---------- feature_info: dict Feature configuration information for the feature as specified in the feature_config extracted_features: dict Dictionary of feature tensors extracted by parsing the serialized TFRecord sequence_size: int, optional Number of elements in the sequence of a SequenceExample Returns ------- tf.Tensor Feature tensor that is obtained from the extracted features for the given feature_info """ default_tensor = self.get_default_tensor(feature_info, sequence_size) feature_tensor = extracted_features.get(feature_info["name"], default_tensor) if isinstance(feature_tensor, tf.sparse.SparseTensor): feature_tensor = sparse.to_dense(sparse.reset_shape(feature_tensor)) """ NOTE: If a feature is in the features_spec, then it gets retrieved as an empty sparse tensor. So we need to replace with default tensor """ if tf.size(feature_tensor) == tf.constant(0): feature_tensor = default_tensor return feature_tensor
def train_with_model(samples, level5Data, model_path, save_path): labels_dir = 'labels3' points = [] # for sample in samples: for i in range(len(samples)): # pre-process data sampleLidarPoints = combine_lidar_data(samples[i], Constants.lyft_data_dir, level5Data) startTime = time.time() vfe_points = VFE_preprocessing(sampleLidarPoints, Constants.voxelx, Constants.voxely, Constants.voxelz, Constants.maxPoints, Constants.nx // 2, Constants.ny // 2, Constants.nz) # Convert to dense here because keras won't take sparse tensors vfe_points_dense = sparse.to_dense(vfe_points, default_value=0., validate_indices=False) points.append(vfe_points_dense) endTime = time.time() print(endTime - startTime) print('finished ' + str(i)) # Turn into 6 rank tensor, then convert it to dense because keras is stupid trainPoints = tf.stack(points, axis=0) print('loading labels') # get labels from file outClass = np.load(labels_dir + '\\labelsClass.npy', allow_pickle=True) outRegress = np.load(labels_dir + '\\regressClass.npy', allow_pickle=True) # load model model = load_model(model_path, custom_objects={ 'RepeatLayer': RepeatLayer, 'MaxPoolingVFELayer': MaxPoolingVFELayer }) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss=['mse', 'mse']) # fit model history = model.fit(x=trainPoints, y=[outClass, outRegress], batch_size=1, verbose=1, epochs=1, steps_per_epoch=180) print(history.history) model.save(save_path)
def train(samples, level5Data, save_path): # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' labels_dir = 'labels3' points = [] # for sample in samples: for i in range(len(samples)): # pre-process data sampleLidarPoints = combine_lidar_data(samples[i], Constants.lyft_data_dir, level5Data) startTime = time.time() vfe_points = VFE_preprocessing(sampleLidarPoints, Constants.voxelx, Constants.voxely, Constants.voxelz, Constants.maxPoints, Constants.nx // 2, Constants.ny // 2, Constants.nz) # Need to convert to dense tensors because keras doesn't allow for sparse tensors. vfe_points_dense = sparse.to_dense(vfe_points, default_value=0., validate_indices=False) points.append(vfe_points_dense) endTime = time.time() print(endTime - startTime) print('finished ' + str(i)) # Stack into 6 rank tensor trainPoints = tf.stack(points, axis=0) print('loading labels') # get labels from file outClass = np.load(labels_dir + '\\labelsClass.npy', allow_pickle=True) outRegress = np.load(labels_dir + '\\regressClass.npy', allow_pickle=True) # create model model = createModel(Constants.nx, Constants.ny, Constants.nz, Constants.maxPoints) # plot_model(model, show_shapes=True) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss=['mse', 'mse']) # fit model history = model.fit(x=trainPoints, y=[outClass, outRegress], batch_size=1, verbose=1, epochs=1, steps_per_epoch=180) print(history.history) model.save(save_path)
def get_feature(self, feature_info, extracted_features, sequence_size): """ Fetch the feature from the feature dictionary of extracted features Parameters ---------- feature_info: dict Feature configuration information for the feature as specified in the feature_config extracted_features: dict Dictionary of feature tensors extracted by parsing the serialized TFRecord sequence_size: int, optional Number of elements in the sequence of a SequenceExample Returns ------- tf.Tensor Feature tensor that is obtained from the extracted features for the given feature_info """ extracted_context_features, extracted_sequence_features = extracted_features default_tensor = self.get_default_tensor(feature_info, sequence_size) if feature_info["tfrecord_type"] == SequenceExampleTypeKey.CONTEXT: feature_tensor = extracted_context_features.get( feature_info["name"], default_tensor) default_shape = [feature_info.get("max_len", 1)] else: feature_tensor = extracted_sequence_features.get( feature_info["name"], default_tensor) default_shape = [sequence_size, feature_info.get("max_len", 1)] if isinstance(feature_tensor, sparse.SparseTensor): """ NOTE: Since we define the features as VarLenFeature in features spec, the extracted feature tensors will be sparse. Here, we convert them into dense tensors and also pad accordingly. """ feature_tensor = sparse.reset_shape(feature_tensor, new_shape=default_shape) feature_tensor = sparse.to_dense( feature_tensor, default_value=self.feature_config.get_default_value( feature_info)) return feature_tensor
def testNDimension(self): with self.cached_session() as sess: content = [["1 1:3.4 2:0.5 4:0.231", "1 1:3.4 2:0.5 4:0.231"], ["1 2:2.5 3:inf 5:0.503", "1 2:2.5 3:inf 5:0.503"], ["2 3:2.5 2:nan 1:0.105", "2 3:2.5 2:nan 1:0.105"]] sparse_features, labels = libsvm_dataset_ops.decode_libsvm( content, num_features=6, label_dtype=dtypes.float64) features = sparse.to_dense( sparse_features, validate_indices=False) self.assertAllEqual(labels.get_shape().as_list(), [3, 2]) features, labels = sess.run([features, labels]) self.assertAllEqual(labels, [[1, 1], [1, 1], [2, 2]]) self.assertAllClose( features, [[[0, 3.4, 0.5, 0, 0.231, 0], [0, 3.4, 0.5, 0, 0.231, 0]], [ [0, 0, 2.5, np.inf, 0, 0.503], [0, 0, 2.5, np.inf, 0, 0.503] ], [[0, 0.105, np.nan, 2.5, 0, 0], [0, 0.105, np.nan, 2.5, 0, 0]]])
def testBasic(self): with self.cached_session() as sess: content = [ "1 1:3.4 2:0.5 4:0.231", "1 2:2.5 3:inf 5:0.503", "2 3:2.5 2:nan 1:0.105" ] sparse_features, labels = libsvm_io.decode_libsvm(content, num_features=6) features = sparse.to_dense(sparse_features, validate_indices=False) self.assertAllEqual(labels.get_shape().as_list(), [3]) features, labels = sess.run([features, labels]) self.assertAllEqual(labels, [1, 1, 2]) self.assertAllClose( features, [[0, 3.4, 0.5, 0, 0.231, 0], [0, 0, 2.5, np.inf, 0, 0.503], [0, 0.105, np.nan, 2.5, 0, 0]])
def test_dataset(self): """test_dataset""" libsvm_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_libsvm", "sample") dataset = libsvm_io.make_libsvm_dataset(libsvm_file, num_features=6) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer sparse_features, labels = iterator.get_next() features = sparse.to_dense(sparse_features, validate_indices=False) with self.cached_session() as sess: sess.run(init_op) f, l = sess.run([features, labels]) self.assertAllEqual(l, [1]) self.assertAllClose(f, [[0, 3.4, 0.5, 0, 0.231, 0]]) f, l = sess.run([features, labels]) self.assertAllEqual(l, [1]) self.assertAllClose(f, [[0, 0, 2.5, np.inf, 0, 0.503]]) f, l = sess.run([features, labels]) self.assertAllEqual(l, [2]) self.assertAllClose(f, [[0, 0.105, np.nan, 2.5, 0, 0]]) with self.assertRaises(errors.OutOfRangeError): sess.run([features, labels])
def _parse_sequence_example_fn(sequence_example_proto): """ Parse the input `tf.SequenceExample` proto using the features_spec Parameters ---------- sequence_example_proto : string serialized tfrecord SequenceExample protobuf message Returns ------- features : dict parsed features as `tf.Tensor` objects extracted from the protobuf labels : `tf.Tensor` parsed label as a `tf.Tensor` object extracted from the protobuf """ context_features, sequence_features = io.parse_single_sequence_example( serialized=sequence_example_proto, context_features=context_features_spec, sequence_features=sequence_features_spec, ) features_dict = dict() # Handle context features for feature_info in feature_config.get_context_features(): feature_node_name = feature_info.get("node_name", feature_info["name"]) default_tensor = tf.constant( value=feature_config.get_default_value(feature_info), dtype=feature_info["dtype"], ) feature_tensor = context_features.get(feature_info["name"], default_tensor) feature_tensor = tf.expand_dims(feature_tensor, axis=0) # Preprocess features feature_tensor = preprocess_feature(feature_tensor, feature_info, preprocessing_map) features_dict[feature_node_name] = feature_tensor # Define mask to identify padded sequence if required_fields_only and not feature_config.get_rank("serving_info")["required"]: """ Define dummy mask if the rank field is not a required field for serving NOTE: This masks all max_sequence_size as 1 as there is no real way to know the number of sequence in the query. There is no predefined required field, and hence we would need to do a full pass of all features to find the record shape. This approach might be unstable if different features have different shapes. Hence we just mask all sequence """ features_dict["mask"] = tf.constant( value=1, shape=[max_sequence_size], dtype=feature_config.get_rank("dtype") ) sequence_size = tf.constant(max_sequence_size, dtype=tf.int64) else: # Typically used at training time, to pad/clip to a fixed number of sequence per query # Use rank as a reference tensor to infer shape/sequence_size in query reference_tensor = sequence_features.get(feature_config.get_rank(key="node_name")) # Add mask for identifying padded sequence mask = tf.ones_like(sparse.to_dense(sparse.reset_shape(reference_tensor))) sequence_size = tf.cast(tf.reduce_sum(mask), tf.int64) if pad_sequence: mask = tf.expand_dims(mask, axis=-1) def crop_fn(): tf.print("\n[WARN] Bad query found. Number of sequence : ", tf.shape(mask)[1]) return image.crop_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_sequence_size, ) mask = tf.cond( tf.shape(mask)[1] <= max_sequence_size, # Pad if there are missing sequence lambda: image.pad_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_sequence_size, ), # Crop if there are extra sequence crop_fn, ) mask = tf.squeeze(mask) else: mask = tf.squeeze(mask, axis=0) # Check validity of mask tf.debugging.assert_greater(sequence_size, tf.constant(0, dtype=tf.int64)) features_dict["mask"] = mask sequence_size = max_sequence_size if pad_sequence else sequence_size # Pad sequence features to max_sequence_size for feature_info in feature_config.get_sequence_features(): feature_node_name = feature_info.get("node_name", feature_info["name"]) default_tensor = tf.fill( value=tf.constant( value=feature_config.get_default_value(feature_info), dtype=feature_info["dtype"], ), dims=[max_sequence_size if pad_sequence else sequence_size], ) feature_tensor = sequence_features.get(feature_info["name"], default_tensor) if isinstance(feature_tensor, sparse.SparseTensor): feature_tensor = sparse.reset_shape( feature_tensor, new_shape=[1, max_sequence_size if pad_sequence else sequence_size], ) feature_tensor = sparse.to_dense(feature_tensor) feature_tensor = tf.squeeze(feature_tensor, axis=0) # Preprocess features feature_tensor = preprocess_feature(feature_tensor, feature_info, preprocessing_map) features_dict[feature_node_name] = feature_tensor labels = features_dict.pop(feature_config.get_label(key="name")) return features_dict, labels
def generate_and_add_mask(self, extracted_features, features_dict): """ Create a mask to identify padded values Parameters ---------- extracted_features: dict Dictionary of tensors extracted from the serialized TFRecord features_dict: dict Dictionary of tensors that will be used for model training/serving as inputs to the model Returns ------- features_dict: dict Dictionary of tensors that will be used for model training/serving updated with the mask tensor if applicable sequence_size: int Number of elements in the sequence of the TFRecord """ context_features, sequence_features = extracted_features if (self.required_fields_only and not self.feature_config.get_rank("serving_info")["required"]): """ Define dummy mask if the rank field is not a required field for serving NOTE: This masks all max_sequence_size as 1 as there is no real way to know the number of sequence in the query. There is no predefined required field, and hence we would need to do a full pass of all features to find the record shape. This approach might be unstable if different features have different shapes. Hence we just mask all sequence """ mask = tf.constant( value=1, shape=[self.max_sequence_size], dtype=self.feature_config.get_rank("dtype"), ) sequence_size = tf.constant(self.max_sequence_size, dtype=tf.int64) else: # Typically used at training time, to pad/clip to a fixed number of sequence per query # Use rank as a reference tensor to infer shape/sequence_size in query reference_tensor = sequence_features.get( self.feature_config.get_rank(key="node_name")) # Add mask for identifying padded sequence mask = tf.ones_like( sparse.to_dense(sparse.reset_shape(reference_tensor))) if self.pad_sequence: mask = tf.squeeze(mask, axis=0) def crop_fn(): # NOTE: We currently ignore these cases as there is no clear # way to select max_sequence_size from all the sequence features tf.print("\n[WARN] Bad query found. Number of sequence : ", tf.shape(mask)[0]) return mask mask = tf.cond( tf.shape(mask)[0] <= self.max_sequence_size, # Pad if there are missing sequence lambda: tf.pad(mask, [[ 0, self.max_sequence_size - tf.shape(mask)[0] ]]), # Crop if there are extra sequence crop_fn, ) sequence_size = tf.constant(self.max_sequence_size, dtype=tf.int64) else: mask = tf.squeeze(mask, axis=0) sequence_size = tf.cast(tf.reduce_sum(mask), tf.int64) # Check validity of mask tf.debugging.assert_greater(sequence_size, tf.constant(0, dtype=tf.int64)) # Update features dictionary with the computed mask tensor features_dict["mask"] = mask return features_dict, sequence_size
def _parse_sequence_example_fn(sequence_example_proto): """ Parse the input `tf.Example` proto using the features_spec Args: sequence_example_proto: tfrecord SequenceExample protobuf data Returns: features: parsed features extracted from the protobuf labels: parsed label extracted from the protobuf """ context_features, sequence_features = io.parse_single_sequence_example( serialized=sequence_example_proto, context_features=context_features_spec, sequence_features=sequence_features_spec, ) features_dict = dict() # Explode context features into all records for feature_info in feature_config.get_context_features(): feature_node_name = feature_info.get("node_name", feature_info["name"]) feature_layer_info = feature_info.get("feature_layer_info") feature_tensor = context_features.get(feature_node_name) feature_tensor = tf.expand_dims(feature_tensor, axis=0) feature_tensor = tf.tile(feature_tensor, multiples=[max_num_records]) # If feature is a string, then decode into numbers if feature_layer_info["type"] == FeatureTypeKey.STRING: feature_tensor = io.decode_raw( feature_tensor, out_type=tf.uint8, fixed_length=feature_layer_info["max_length"], ) feature_tensor = tf.cast(feature_tensor, tf.float32) features_dict[feature_node_name] = feature_tensor # Pad sequence features to max_num_records for feature_info in feature_config.get_sequence_features(): feature_node_name = feature_info.get("node_name", feature_info["name"]) feature_layer_info = feature_info["feature_layer_info"] feature_tensor = sequence_features.get(feature_node_name) if isinstance(feature_tensor, sparse.SparseTensor): if feature_node_name == feature_config.get_rank( key="node_name"): # Add mask for identifying padded records mask = tf.ones_like( sparse.to_dense(sparse.reset_shape(feature_tensor))) mask = tf.expand_dims(mask, axis=2) def crop_fn(): tf.print( "\n[WARN] Bad query found. Number of records : ", tf.shape(mask)[1]) return image.crop_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_num_records, ) mask = tf.cond( tf.shape(mask)[1] < max_num_records, # Pad if there are missing records lambda: image.pad_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_num_records, ), # Crop if there are extra records crop_fn, ) mask = tf.squeeze(mask) # Check validity of mask tf.debugging.assert_greater( tf.cast(tf.reduce_sum(mask), tf.float32), tf.constant(0.0)) features_dict["mask"] = mask feature_tensor = sparse.reset_shape( feature_tensor, new_shape=[1, max_num_records]) feature_tensor = sparse.to_dense(feature_tensor) feature_tensor = tf.squeeze(feature_tensor) # If feature is a string, then decode into numbers if feature_layer_info["type"] == FeatureTypeKey.STRING: feature_tensor = io.decode_raw( feature_tensor, out_type=tf.uint8, fixed_length=feature_layer_info["max_length"], ) feature_tensor = tf.cast(feature_tensor, tf.float32) else: raise ValueError("Invalid input : {}".format(feature_name)) features_dict[feature_node_name] = feature_tensor labels = features_dict.pop(feature_config.get_label(key="name")) # Check if label is one-hot and correctly masked tf.debugging.assert_equal(tf.cast(tf.reduce_sum(labels), tf.float32), tf.constant(1.0)) return features_dict, labels
def _parse_sequence_example_fn(sequence_example_proto): """ Parse the input `tf.Example` proto using the features_spec Args: sequence_example_proto: tfrecord SequenceExample protobuf data Returns: TODO(ashish): note - "features" is not a Features object. It's a {feat_name: tf.Tensor} mapping (so perhaps a bad name?) features: parsed features extracted from the protobuf labels: parsed label extracted from the protobuf """ context, examples = io.parse_single_sequence_example( serialized=sequence_example_proto, context_features=context_features_spec, sequence_features=sequence_features_spec, ) features = dict() # Explode context features into all records for feat, t in context.items(): t = tf.expand_dims(t, axis=0) t = tf.tile(t, multiples=[max_num_records]) # If feature is a string, then decode into numbers if feature_config.get_dict( )[feat]["type"] == FeatureTypeKey.STRING: t = io.decode_raw( t, out_type=tf.uint8, fixed_length=feature_config.get_dict()[feat]["max_length"], ) t = tf.cast(t, tf.float32) features[feat] = t # Pad sequence features to max_num_records for feat, t in examples.items(): if isinstance(t, sparse.SparseTensor): if feat == "pos": # Add mask for identifying padded records mask = tf.ones_like(sparse.to_dense(sparse.reset_shape(t))) mask = tf.expand_dims(mask, axis=2) mask = image.pad_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_num_records, ) features["mask"] = tf.squeeze(mask) t = sparse.reset_shape(t, new_shape=[1, max_num_records]) t = sparse.to_dense(t) t = tf.squeeze(t) # If feature is a string, then decode into numbers if feature_config.get_dict( )[feat]["type"] == FeatureTypeKey.STRING: t = io.decode_raw( t, out_type=tf.uint8, fixed_length=feature_config.get_dict()[feat] ["max_length"], ) t = tf.cast(t, tf.float32) else: # # Handle dense tensors # # if len(t.shape) == 1: # t = tf.expand_dims(t, axis=0) # if len(t.shape) == 2: # t = tf.pad(t, paddings=[[0, 0], [0, max_num_records]]) # t = tf.squeeze(t) # else: # raise Exception('Invalid input : {}'.format(feat)) raise ValueError("Invalid input : {}".format(feat)) features[feat] = t labels = features.pop(feature_config.label) return features, labels