def parse_sequence_example(serialized): sequence_features = { "words": tf.FixedLenSequenceFeature( [], dtype=tf.int64 ), # in order to have a vector. if i put [1] it will probably # be a matrix with just one column "chars": tf.VarLenFeature(tf.int64), "chars_len": tf.FixedLenSequenceFeature([], dtype=tf.int64), "begin_span": tf.FixedLenSequenceFeature([], dtype=tf.int64), "end_span": tf.FixedLenSequenceFeature([], dtype=tf.int64), "cand_entities": tf.VarLenFeature(tf.int64), "cand_entities_scores": tf.VarLenFeature(tf.float32), "cand_entities_labels": tf.VarLenFeature(tf.int64), "cand_entities_len": tf.FixedLenSequenceFeature([], dtype=tf.int64), "ground_truth": tf.FixedLenSequenceFeature([], dtype=tf.int64) } if True: sequence_features["begin_gm"] = tf.FixedLenSequenceFeature( [], dtype=tf.int64) sequence_features["end_gm"] = tf.FixedLenSequenceFeature( [], dtype=tf.int64) context, sequence = tf.parse_single_sequence_example( serialized, context_features={ "chunk_id": tf.FixedLenFeature([], dtype=tf.string), "words_len": tf.FixedLenFeature([], dtype=tf.int64), "spans_len": tf.FixedLenFeature([], dtype=tf.int64), "ground_truth_len": tf.FixedLenFeature([], dtype=tf.int64) }, sequence_features=sequence_features) return context["chunk_id"], sequence["words"], context["words_len"],\ tf.sparse_tensor_to_dense(sequence["chars"]), sequence["chars_len"],\ sequence["begin_span"], sequence["end_span"], context["spans_len"],\ tf.sparse_tensor_to_dense(sequence["cand_entities"]),\ tf.sparse_tensor_to_dense(sequence["cand_entities_scores"]),\ tf.sparse_tensor_to_dense(sequence["cand_entities_labels"]),\ sequence["cand_entities_len"],\ sequence["ground_truth"], context["ground_truth_len"],\ sequence["begin_gm"], sequence["end_gm"]
def _transform_fn(self, example): feature_map = dict() for config in self._feature_configs: if config.size == -1: #多值特征 feature_map[config.name] = tf.FixedLenSequenceFeature( shape=[], dtype=config.dtype, allow_missing=True, default_value=config.default_value, ) else: feature_map[config.name] = tf.FixedLenFeature( shape=[config.size], dtype=config.dtype, default_value=config.default_value) features = tf.parse_example(example, feature_map) for k, v in features.items(): features[k] = tf.identity(v, name=k) return features
def just_read(): feature_names = ["rgb", "audio"] filename_queue = tf.train.string_input_producer( ["a.tfrecord"], # filename_queue = tf.train.string_input_producer(["../../trainzy.tfrecord"], shuffle=False, num_epochs=1) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64) }, sequence_features={ feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in feature_names }) sparse_labels = contexts["labels"].values video_id = contexts["video_id"] rgb = features["rgb"] audio = features["audio"] rgb = tf.reshape(tf.cast(tf.decode_raw(rgb, tf.uint8), tf.float32), [-1, 1024]) audio = tf.reshape(tf.cast(tf.decode_raw(audio, tf.uint8), tf.float32), [-1, 128]) with tf.Session() as sess: sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) video_id_v, sparse_labels_v, rgb_v, audio_v = sess.run( [video_id, sparse_labels, rgb, audio]) print(video_id_v) print(sparse_labels_v) print(rgb_v) print(audio_v) coord.request_stop() coord.join(threads)
def _parse_sequence_example(proto): context_features = { "category_one": tf.FixedLenFeature([], dtype=tf.int64), "category_two": tf.FixedLenFeature([], dtype=tf.int64), "category_three": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=proto, context_features=context_features, sequence_features=sequence_features) one_hot_category = tf.one_hot(context_parsed['category_three'], FLAGS.num_classes) return one_hot_category, sequence_parsed['tokens']
def extract_fn(data_record): con_fea = { 'answers': tf.VarLenFeature(dtype=tf.string), 'answer_spans': tf.FixedLenFeature([2], dtype=tf.int64), # 不写2的话,会报错 'answer_passages': tf.FixedLenFeature([], dtype=tf.int64) } seq_fea = { 'passage_token_ids': tf.VarLenFeature(dtype=tf.int64), 'is_selected': tf.FixedLenSequenceFeature([], dtype=tf.int64) } # 把序列化样本和解析字典送入函数里得到解析的样本 context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=data_record, context_features=con_fea, sequence_features=seq_fea) context_parsed["answers"] = tf.sparse_tensor_to_dense( context_parsed["answers"], default_value="0") sequence_parsed["passage_token_ids"] = tf.sparse_tensor_to_dense( sequence_parsed["passage_token_ids"], default_value=0) # 其实这里的data[0]就是context_parsed,data[1]就是sequence_parsed # context_data, sequence_data # print("type context_data=", type(context_data)) # print("type sequence_data=", type(sequence_data)) # print('Context:') # for name, tensor in context_data.items(): # # print('{}: {}'.format(name, tensor.eval(session=sess))) # # print('{}: {}'.format(name, sess.run(tensor))) # print(name) # print(sess.run(tensor)) # print('\nData') # for name, tensor in sequence_data.items(): # print('{}: {}'.format(name, tensor.eval(session=sess))) rs_dic = {} rs_dic['answers'] = context_parsed['answers'] rs_dic['answer_spans'] = context_parsed['answer_spans'] rs_dic['answer_passages'] = tf.expand_dims( tf.convert_to_tensor(context_parsed['answer_passages']), 0) # 注意,如果不进行维度变换的话,其shape是(),因为是scale值 rs_dic['passage_token_ids'] = sequence_parsed['passage_token_ids'] rs_dic['is_selected'] = sequence_parsed['is_selected'] return rs_dic
def _generate_feats_and_label_batch(filename_queue, batch_size): """Construct a queued batch of spectral features and transcriptions. Args: filename_queue: queue of filenames to read data from. batch_size: Number of utterances per batch. Returns: feats: spectrograms. 4D tensor of [batch_size, height, width, 3] size. labels: transcripts. List of length batch_size. seq_lens: Sequence Lengths. List of length batch_size. """ # Define how to parse the example reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features = { "seq_len": tf.FixedLenFeature([], dtype=tf.int64), "labels": tf.VarLenFeature(dtype=tf.int64) } sequence_features = { # features are 161 dimensional "feats": tf.FixedLenSequenceFeature([161], dtype=tf.float32) } # Parse the example (returns a dictionary of tensors) context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features) # Generate a batch worth of examples after bucketing seq_len, (feats, labels) = tf.contrib.training.bucket_by_sequence_length( input_length=tf.cast(context_parsed['seq_len'], tf.int32), tensors=[sequence_parsed['feats'], context_parsed['labels']], batch_size=batch_size, bucket_boundaries=list(range(100, 2500, 100)), allow_smaller_final_batch=True, # num_threads=16, num_threads=1, dynamic_pad=True) return feats, tf.cast(labels, tf.int32), seq_len
def parse_sequence_example(serialized, sample_feature): """Parse a tensorflow.SequenceExample into an real sample. Args: serialized: A scalar string Tensor, a single serialized SequenceExample. sample_feature: Name of SequenceExample feature list you have set in Serialized Return: A raw sample. """ _, sequence = tf.parse_single_sequence_example( serialized, # Here I have not context when convert to sequence example, context features is none # context_features= None sequence_features={ sample_feature: tf.FixedLenSequenceFeature([], dtype=tf.string) }) sample = sequence['sample'] return sample
def valid_filequeue_reader(self, filename_queue): (keys, values) = self.valid_reader.read_up_to(filename_queue, self.config.batch_size) context_features = { "seq_len": tf.FixedLenFeature([1], dtype=tf.int64), "correctness": tf.FixedLenFeature([1], dtype=tf.int64), "label": tf.VarLenFeature(dtype=tf.int64) } audio_features = { "audio": tf.FixedLenSequenceFeature([self.last_dim], dtype=tf.float32) } audio_list = [] len_list = [] correct_list = [] label_list = [] for i in range(self.config.batch_size): context, sequence = tf.parse_single_sequence_example( serialized=values[i], context_features=context_features, sequence_features=audio_features ) audio = sequence['audio'] seq_len = context['seq_len'] correct = context['correctness'] label = context['label'] sparse_label = tf.sparse_reshape(label, [1, -1]) label_list.append(sparse_label) audio_list.append(audio) len_list.append(seq_len) correct_list.append(correct) label_tensor = tf.sparse_tensor_to_dense( tf.sparse_concat(0, label_list, expand_nonconcat_dim=True), -1) seq_lengths = tf.cast( tf.reshape(tf.stack(len_list), (-1,), name='seq_lengths'), tf.int32) correctness = tf.reshape(tf.stack(correct_list), (-1,), name='correctness') return tf.stack(audio_list, name='input_audio'), seq_lengths, correctness, label_tensor
def _parse_example(serialized_example): # Define how to parse the example context_features = { "seq_len": tf.FixedLenFeature([], dtype=tf.int64), "labels": tf.VarLenFeature(dtype=tf.int64) } sequence_features = { # Features are 161 dimensional "feats": tf.FixedLenSequenceFeature([64, ], dtype=tf.float32) } # Parse the example (returns a dictionary of tensors) ctx_parsed, seq_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features ) return ctx_parsed, seq_parsed
def get_input_receiver(params): feature_spec = { 'sgram': tf.FixedLenSequenceFeature( [params.spectrogram_bins], tf.float32, allow_missing=True ) } def serving_input_receiver_fn(): """An input receiver that expects a serialized tf.Example.""" serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[1], name='input_tensor') receiver_tensors = {'input': serialized_tf_example} features = tf.parse_example(serialized_tf_example, feature_spec) return tf.estimator.export.ServingInputReceiver(features, receiver_tensors) return serving_input_receiver_fn
def _decode_prediction_example(self, ex_serial): context_features = {ExampleString.token: tf.FixedLenFeature([], dtype=tf.string), ExampleString.sequent_length: tf.FixedLenFeature([], dtype=tf.int64), ExampleString.input_end_date: tf.FixedLenFeature([], dtype=tf.string), ExampleString.input_start_date: tf.FixedLenFeature([], dtype=tf.string) } sequence_features = {ExampleString.input_sequence: tf.FixedLenSequenceFeature([], dtype=tf.float32) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=ex_serial, context_features=context_features, sequence_features=sequence_features ) return context_parsed, sequence_parsed
def testSequenceExampleListWithWrongShapeFails(self): original = sequence_example(feature_lists=feature_lists({ "a": feature_list([ int64_feature([2, 3]), int64_feature([2, 3, 4]) ]), })) serialized = original.SerializeToString() self._test( { "example_name": "in1", "serialized": tf.convert_to_tensor(serialized), "sequence_features": {"a": tf.FixedLenSequenceFeature( (2,), tf.int64)} }, expected_err=(tf.OpError, r"Name: in1, Key: a, Index: 1." r" Number of int64 values != expected." r" values size: 3 but output shape: \[2\]"))
def testSequenceExampleListWithWrongSparseDataTypeFails(self): original = sequence_example(feature_lists=feature_lists({ "a": feature_list([ int64_feature([3, 4]), int64_feature([1, 2]), float_feature([2.0, 3.0])]) })) serialized = original.SerializeToString() self._test({ "example_name": "in1", "serialized": tf.convert_to_tensor(serialized), "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)} }, expected_err=( tf.OpError, "Name: in1, Feature list: a, Index: 2." " Data types don't match. Expected type: int64" " Feature is: float_list"))
def get_features_and_labels(feature_names, input_tfrecord_data_path, num_classes): """ Utility function to get the features and labels from the multiclass samples' tfrecords :param feature_names: :param input_tfrecord_data_path: :param num_classes: :return: """ list_of_feature_names = [ feature_names.strip() for feature_names in feature_names.split(',') ] # now read the input tfrecord files from the given path files = gfile.Glob(input_tfrecord_data_path) if not files: raise IOError("Unable to find training files. tfrecord_data_path='" + input_tfrecord_data_path + "'.") logging.info("Number of training files: %s.", str(len(files))) files.reverse() filename_queue = tf.train.string_input_producer(files, num_epochs=1, shuffle=False) reader = tf.TFRecordReader() filename, serialized_example = reader.read(filename_queue) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64) }, sequence_features={ feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in list_of_feature_names }) context_video_id = contexts["video_id"] # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (num_classes, ), 1, validate_indices=False), tf.int32)) return context_video_id, features, labels
def input_pipeline(tfrecords_dir, file_pattern): tfrecords_list = glob.glob(os.path.join(tfrecords_dir, file_pattern)) train_file_num = int(len(tfrecords_list) * 0.9) tfrecords_list = tfrecords_list[0:train_file_num] filename_queue = tf.train.string_input_producer( tfrecords_list, num_epochs=FLAGS.num_epochs) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features = {"id": tf.FixedLenFeature([], tf.string)} feature_names = ["rgb", "audio"] sequence_features = { feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in feature_names } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features) video_decoded_features = tf.reshape( tf.cast(tf.decode_raw(sequence_parsed['rgb'], tf.uint8), tf.float32), [-1, 1024]) audio_decoded_features = tf.reshape( tf.cast(tf.decode_raw(sequence_parsed['audio'], tf.uint8), tf.float32), [-1, 128]) video_batch_data, audio_batch_data = tf.train.batch( tensors=[video_decoded_features, audio_decoded_features], batch_size=FLAGS.batch_size, num_threads=10, capacity=4 * FLAGS.batch_size, allow_smaller_final_batch=True, dynamic_pad=True) video_batch_data = resize_axis(tensor=video_batch_data, axis=1, new_size=FLAGS.max_frames) audio_batch_data = resize_axis(tensor=audio_batch_data, axis=1, new_size=FLAGS.max_frames) return video_batch_data, audio_batch_data, train_file_num
def _parse_data(example_proto): context, sequence = tf.parse_single_sequence_example( example_proto, context_features={ 'caption_id': tf.FixedLenFeature([], dtype=tf.int64), 'image_id': tf.FixedLenFeature([], dtype=tf.int64), 'image_data': tf.FixedLenFeature([], dtype=tf.string) }, sequence_features={ 'image_caption': tf.FixedLenSequenceFeature([], dtype=tf.int64), }) image_data = tf.decode_raw(context['image_data'], tf.uint8) image_data = tf.reshape(image_data, [224, 224, 3]) caption = sequence['image_caption'] caption_id = context['caption_id'] image_id = context['image_id'] return image_data, caption, image_id, caption_id
def maybe_map_bfloat(value): """Maps bfloat16 to float32.""" if is_bfloat_feature(value): if isinstance(value, tf.FixedLenFeature): return tf.FixedLenFeature( value.shape, tf.float32, default_value=value.default_value) elif isinstance(value, tf.VarLenFeature): return tf.VarLenFeature( value.shape, tf.float32, default_value=value.default_value) else: return tf.FixedLenSequenceFeature( value.shape, tf.float32, default_value=value.default_value) return value
def testSequenceExampleListWithWrongDataTypeFails(self): original = sequence_example(feature_lists=feature_lists({ "a": feature_list([ float_feature([2, 3]) ]) })) serialized = original.SerializeToString() self._test( { "example_name": "in1", "serialized": tf.convert_to_tensor(serialized), "sequence_features": {"a": tf.FixedLenSequenceFeature( (2,), tf.int64)} }, expected_err=(tf.OpError, "Feature list: a, Index: 0. Data types don't match." " Expected type: int64"))
def read_and_decode(example): # decoding tfr files from vvgish - audioset context_features = {'movie_id': tf.FixedLenFeature([], tf.string)} sequence_features = { 'audio_embedding': tf.FixedLenSequenceFeature([], tf.string) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( example, context_features=context_features, sequence_features=sequence_features) normalized_feature = tf.divide( tf.decode_raw(sequence_parsed['audio_embedding'], tf.uint8), tf.constant(255, tf.uint8)) shaped_feature = tf.reshape(tf.cast(normalized_feature, tf.float32), [-1, 128]) return context_parsed['movie_id'], shaped_feature
def decode(ex): context_features = { "sentiment": tf.FixedLenFeature([], dtype=tf.float32), "text": tf.FixedLenFeature([], dtype=tf.string), } sequence_features = { "tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=ex, context_features=context_features, sequence_features=sequence_features) sentiment = tf.reshape(context_parsed["sentiment"], [1]) text = tf.reshape(context_parsed["text"], [1]) tokens = sequence_parsed["tokens"] return (text, sentiment, tokens)
def _parse_function(example_proto): context_features = {"y": tf.FixedLenFeature([1], dtype=tf.int64)} seq_features = {'x': tf.FixedLenSequenceFeature([], tf.int64)} context_parsed, seq_parsed = tf.parse_single_sequence_example( serialized=example_proto, context_features=context_features, sequence_features=seq_features) y = tf.one_hot(context_parsed["y"][0], class_count, dtype=tf.int64) x = seq_parsed["x"] if random_crop: random_window = tf.where(tf.less(crop_size, tf.shape(x)[0]), x=crop_size, y=tf.shape(x)[0]) x = tf.random_crop(x, [random_window]) else: x = x[start_index:end_index] x = tf.clip_by_value(x, 0, vocab_cap) return y, x
def parse_examples_batch(examples_batch): example_fields = tf.parse_example( examples_batch, features={ 'id': tf.FixedLenFeature([], dtype=tf.string), 'text': tf.FixedLenSequenceFeature([], dtype=tf.int64, allow_missing=True), 'text_length': tf.FixedLenFeature([], dtype=tf.int64), 'classes': tf.FixedLenFeature([len(utils.CLASSES)], dtype=tf.int64) }) features = { key: example_fields[key] for key in ['id', 'text', 'text_length'] } labels = {key: example_fields[key] for key in ['classes']} return features, labels
def test_tfrecord_decode_v2(): """ Parse tfrecord example. :param exam: one example instance :return: image, size, labels, bboxes """ file_queue = tf.train.string_input_producer([ '/home/autel/libs/ssd-tensorflow-ljanyst/pascal-voc/trainval/VOCdevkit/VOC2007/tfrecords/1.tfrecords' ]) reader = tf.TFRecordReader() _, example = reader.read(file_queue) context_feature = { 'image': tf.FixedLenFeature([], tf.string), 'size': tf.FixedLenFeature([3], dtype=tf.int64), 'bbox_num': tf.FixedLenFeature([1], dtype=tf.int64) } varlen_feature = { 'bboxes': tf.FixedLenSequenceFeature([5], dtype=tf.int64) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=example, context_features=context_feature, sequence_features=varlen_feature) image = context_parsed['image'] size = context_parsed['size'] bbox_num = context_parsed['bbox_num'] bbox_num = tf.squeeze(bbox_num) bboxes = sequence_parsed['bboxes'] labels = tf.slice(bboxes, [0, 0], [tf.cast(bbox_num, dtype=tf.int32), 1]) # labels = tf.slice(bboxes, [0, 0], [1, 1]) # bboxes = tf.slice(bboxes, [0, 1], [tf.cast(bbox_num, dtype=tf.int32), 4]) # labels = tf.squeeze(labels) # # labels = utils.makeOneHot(labels, 20) # labels = bbox_num return image, size, labels, bboxes
def parse_eval_example(serialized): """Parses a tensorflow.SequenceExample into an image and caption. Args: serialized: A scalar string Tensor; a single serialized SequenceExample. image_feature: Name of SequenceExample context feature containing image data. caption_feature: Name of SequenceExample feature list containing integer captions. Returns: encoded_image: A scalar string Tensor containing a JPEG encoded image. caption: A 1-D uint64 Tensor with dynamically specified length. """ context, sequence = tf.parse_single_sequence_example( serialized, context_features={ "image/image_id": tf.FixedLenFeature([], dtype=tf.int64), "image/filename": tf.FixedLenFeature([], dtype=tf.string), "image/data": tf.FixedLenFeature([], dtype=tf.string), "iamge/bounding_box": tf.FixedLenFeature([], dtype=tf.string), }, sequence_features={ "iamge/raw_caption": tf.FixedLenSequenceFeature([], dtype=tf.string), # "image/caption_ids": tf.FixedLenSequenceFeature([21], dtype=tf.int64), # "image/caption_mask": tf.FixedLenSequenceFeature([21], dtype=tf.float32), }) image_id = context['image/image_id'] filename = context['image/filename'] encoded_image = context["image/data"] img = tf.decode_raw(encoded_image, tf.float32) img = tf.reshape(img, [100, 2048]) bounding_box = context["iamge/bounding_box"] bounding_box = tf.decode_raw(bounding_box, tf.float32) bounding_box = tf.reshape(bounding_box, [100, 4]) caption = sequence["iamge/raw_caption"] return img, image_id, filename, caption, bounding_box
def input_fn(data_dir, batch_size, is_training=None, prep_style='minimal', num_parallel_reads=0, params=None): feature_map = { 'data': tf.FixedLenSequenceFeature([], allow_missing=True, dtype=tf.int64, default_value=0), 'label': tf.FixedLenFeature([5], dtype=tf.int64) } file_prefix = params['file_prefix'] filenames = get_filenames(data_dir, is_training, fmt='tfrecords', prefix=file_prefix) dataset = tf.data.Dataset.from_tensor_slices(filenames) if is_training: dataset = dataset.shuffle(buffer_size=len(filenames)) if num_parallel_reads >= 1: dataset = dataset.flat_map(lambda x: tf.data.TFRecordDataset( x, num_parallel_reads=num_parallel_reads)) else: dataset = dataset.flat_map(tf.data.TFRecordDataset) def parse_record_fn(raw_record, is_training): return parse_record(raw_record, is_training=is_training, feature_map=feature_map, prep_style=prep_style, params=params) return process_record_dataset(dataset, is_training, batch_size, SHUFFLE_BUFFER, parse_record_fn)
def prepare_reader(self, filename_queue, max_quantized_value=2, min_quantized_value=-2): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features, sequence_features = { "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64), }, None if self.sequence_data: sequence_features = { self.feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string), } else: context_features[self.feature_name] = tf.FixedLenFeature(self.feature_size, tf.float32) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features=context_features, sequence_features=sequence_features) labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1), tf.bool)) if self.sequence_data: decoded_features = tf.reshape( tf.cast( tf.decode_raw(features[self.feature_name], tf.uint8), tf.float32), [-1, self.feature_size]) num_frames = tf.minimum(tf.shape(decoded_features)[0], self.max_frames) video_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value) else: video_matrix = contexts[self.feature_name] num_frames = tf.constant(-1) # Pad or truncate to 'max_frames' frames. # video_matrix = resize_axis(video_matrix, 0, self.max_frames) return contexts["video_id"], video_matrix, labels, num_frames
def _parse_function(self, sequence_example_proto): """Parse a SequenceExample in the AutoDL/TensorFlow format. Args: sequence_example_proto: a SequenceExample with "x_dense_input" or sparse input or compressed input representation Returns: A tuple of (contexts, features) where `contexts` is a dictionary of 3 Tensor objects of keys 'id', 'label_index', 'label_score' and features a dictionary containing key '0_dense_input' for DENSE, '0_compressed' for COMPRESSED or '0_sparse_col_index', '0_sparse_row_index' and '0_sparse_value' for SPARSE. """ autodl_dataset = self.get_autodl_dataset(subset='train') sequence_features = {} for i in range(autodl_dataset.metadata_.get_bundle_size()): if autodl_dataset.metadata_.is_sparse(i): sequence_features[autodl_dataset._feature_key( i, "sparse_col_index")] = tf.VarLenFeature(tf.int64) sequence_features[autodl_dataset._feature_key( i, "sparse_row_index")] = tf.VarLenFeature(tf.int64) sequence_features[autodl_dataset._feature_key( i, "sparse_value")] = tf.VarLenFeature(tf.float32) elif autodl_dataset.metadata_.is_compressed(i): sequence_features[autodl_dataset._feature_key( i, "compressed")] = tf.VarLenFeature(tf.string) else: sequence_features[autodl_dataset._feature_key( i, "dense_input")] = tf.FixedLenSequenceFeature( autodl_dataset.metadata_.get_tensor_size(i), dtype=tf.float32) contexts, features = tf.parse_single_sequence_example( sequence_example_proto, context_features={ # "id": tf.VarLenFeature(tf.int64), "id": tf.FixedLenFeature([], tf.int64), "label_index": tf.VarLenFeature(tf.int64), "label_score": tf.VarLenFeature(tf.float32) }, sequence_features=sequence_features) return contexts, features
def testSequenceExampleWithMissingFeatureListFails(self): original = sequence_example(feature_lists=feature_lists({})) # Test fails because we didn't add: # feature_list_dense_defaults = {"a": None} self._test( { "example_name": "in1", "serialized": tf.convert_to_tensor( original.SerializeToString()), "sequence_features": { "a": tf.FixedLenSequenceFeature((2, ), tf.int64) } }, expected_err= (tf.OpError, "Name: in1, Feature list 'a' is required but could not be found." " Did you mean to include it in" " feature_list_dense_missing_assumed_empty or" " feature_list_dense_defaults?"))
def _parse_tfexample(serialized_example): '''parse serialized tf.train.SequenceExample to tensors context features : label, task sequence features: sentence ''' context_features = { 'label': tf.FixedLenFeature([], tf.int64), 'task': tf.FixedLenFeature([], tf.int64) } sequence_features = {'sentence': tf.FixedLenSequenceFeature([], tf.int64)} context_dict, sequence_dict = tf.parse_single_sequence_example( serialized_example, context_features=context_features, sequence_features=sequence_features) sentence = sequence_dict['sentence'] label = context_dict['label'] task = context_dict['task'] return task, label, sentence
def parse(ex): ''' Explain to TF how to go froma serialized example back to tensors :param ex: :return: A dictionary of tensors, in this case {seq: The sequence, length: The length of the sequence} ''' context_features = {"length": tf.FixedLenFeature([], dtype=tf.int64)} sequence_features = { "tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64), } # Parse the example (returns a dictionary of tensors) context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=ex, context_features=context_features, sequence_features=sequence_features) return { "seq": sequence_parsed["tokens"], "length": context_parsed["length"] }