def _test(self, kwargs, expected_context_values=None, expected_feat_list_values=None, expected_err=None): expected_context_values = expected_context_values or {} expected_feat_list_values = expected_feat_list_values or {} with self.test_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): c_out, fl_out = tf.parse_single_sequence_example(**kwargs) if c_out: sess.run(flatten_values_tensors_or_sparse(c_out.values())) if fl_out: sess.run(flatten_values_tensors_or_sparse(fl_out.values())) else: # Returns dicts w/ Tensors and SparseTensors. context_out, feat_list_out = tf.parse_single_sequence_example(**kwargs) context_result = sess.run(flatten_values_tensors_or_sparse(context_out.values())) if context_out else [] feat_list_result = ( sess.run(flatten_values_tensors_or_sparse(feat_list_out.values())) if feat_list_out else [] ) # Check values. _compare_output_to_expected(self, context_out, expected_context_values, context_result) _compare_output_to_expected(self, feat_list_out, expected_feat_list_values, feat_list_result) # Check shapes; if serialized is a Tensor we need its size to # properly check. if "context_features" in kwargs: for k, f in kwargs["context_features"].items(): if isinstance(f, tf.FixedLenFeature) and f.shape is not None: self.assertEqual(tuple(context_out[k].get_shape().as_list()), f.shape) elif isinstance(f, tf.VarLenFeature): self.assertEqual(tuple(context_out[k].indices.get_shape().as_list()), (None, 1)) self.assertEqual(tuple(context_out[k].values.get_shape().as_list()), (None,)) self.assertEqual(tuple(context_out[k].dense_shape.get_shape().as_list()), (1,))
def parse_example_queue(example_queue, config): """ Read one example. This function read one example and return context sequence and tag sequence correspondingly. Args: filename_queue: A filename queue returned by string_input_producer context_feature_name: Context feature name in TFRecord. Set in ModelConfig tag_feature_name: Tag feature name in TFRecord. Set in ModelConfig Returns: input_seq: An int32 Tensor with different length. tag_seq: An int32 Tensor with different length. """ #Parse one example context, features = tf.parse_single_sequence_example( example_queue, context_features={ config.length_name: tf.FixedLenFeature([], dtype=tf.int64) }, sequence_features={ config.context_feature_name: tf.FixedLenSequenceFeature([], dtype=tf.int64), config.tag_feature_name: tf.FixedLenSequenceFeature([], dtype=tf.int64) }) return (features[config.context_feature_name], features[config.tag_feature_name], context[config.length_name])
def parse_sequence_example(serialized, image_feature, caption_feature): """Parses a tensorflow.SequenceExample into an image and caption. Args: serialized: A scalar string Tensor; a single serialized SequenceExample. image_feature: Name of SequenceExample context feature containing image data. caption_feature: Name of SequenceExample feature list containing integer captions. Returns: encoded_image: A scalar string Tensor containing a JPEG encoded image. caption: A 1-D uint64 Tensor with dynamically specified length. """ context, sequence = tf.parse_single_sequence_example( serialized, context_features={ image_feature: tf.FixedLenFeature([], dtype=tf.string) }, sequence_features={ caption_feature: tf.FixedLenSequenceFeature([], dtype=tf.int64), }) encoded_image = context[image_feature] caption = sequence[caption_feature] return encoded_image, caption
def decode(self, serialized_example, items=None): """Decodes the given serialized TF-SequenceExample. Args: serialized_example: a serialized TF-SequenceExample tensor. items: the list of items to decode. These must be a subset of the item keys in self._items_to_handlers. If `items` is left as None, then all of the items in self._items_to_handlers are decoded. Returns: the decoded items, a list of tensor. """ context, feature_list = tf.parse_single_sequence_example( serialized_example, self._keys_to_context_features, self._keys_to_sequence_features) # Reshape non-sparse elements just once: for k in self._keys_to_context_features: v = self._keys_to_context_features[k] if isinstance(v, tf.FixedLenFeature): context[k] = tf.reshape(context[k], v.shape) if not items: items = self._items_to_handlers.keys() outputs = [] for item in items: handler = self._items_to_handlers[item] keys_to_tensors = { key: context[key] if key in context else feature_list[key] for key in handler.keys } outputs.append(handler.tensors_to_item(keys_to_tensors)) return outputs
def input_sequence_example(file_list, hparams): """Deserializes SequenceExamples from TFRecord. Args: file_list: List of TFRecord files containing SequenceExamples. hparams: HParams instance containing model hyperparameters. Returns: seq_key: Key of SequenceExample as a string. context: Context of SequenceExample as dictionary key -> Tensor. sequence: Sequence of SequenceExample as dictionary key -> Tensor. """ file_queue = tf.train.string_input_producer(file_list) reader = tf.TFRecordReader() seq_key, serialized_example = reader.read(file_queue) sequence_features = { 'inputs': tf.FixedLenSequenceFeature(shape=[hparams.one_hot_length], dtype=tf.float32), 'labels': tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64) } context, sequence = tf.parse_single_sequence_example( serialized_example, sequence_features=sequence_features) return seq_key, context, sequence
def prepare_serialized_examples(self, serialized_example, max_quantized_value=2, min_quantized_value=-2): contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={"id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1, validate_indices=False), tf.bool)) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) num_frames = -1 # the number of frames in the video feature_matrices = [None] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature else: tf.assert_equal(num_frames, num_frames_in_this_feature) feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = tf.concat(feature_matrices, 1) # convert to batch format. # TODO: Do proper batch reads to remove the IO bottleneck. batch_video_ids = tf.expand_dims(contexts["id"], 0) batch_video_matrix = tf.expand_dims(video_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def read_record(self, record): """Parse record TFRecord into a set a set of values, names and types that can be queued and then read. Returns: - queue_values: Dict with tensor values. - queue_names: Names for each tensor. - queue_types: Types for each tensor. """ # We parse variable length features (bboxes in a image) as sequence # features context_example, sequence_example = tf.parse_single_sequence_example( record, context_features=self.CONTEXT_FEATURES, sequence_features=self.SEQUENCE_FEATURES ) # Decode image image_raw = tf.image.decode_image( context_example['image_raw'], channels=3 ) image = tf.cast(image_raw, tf.float32) height = tf.cast(context_example['height'], tf.int32) width = tf.cast(context_example['width'], tf.int32) image_shape = tf.stack([height, width, 3]) image = tf.reshape(image, image_shape) label = self._sparse_to_tensor(sequence_example['label']) xmin = self._sparse_to_tensor(sequence_example['xmin']) xmax = self._sparse_to_tensor(sequence_example['xmax']) ymin = self._sparse_to_tensor(sequence_example['ymin']) ymax = self._sparse_to_tensor(sequence_example['ymax']) # Stack parsed tensors to define bounding boxes of shape (num_boxes, 5) bboxes = tf.stack([xmin, ymin, xmax, ymax, label], axis=1) image, bboxes, preprocessing_details = self.preprocess(image, bboxes) filename = tf.cast(context_example['filename'], tf.string) # TODO: Send additional metadata through the queue (scale_factor, # applied_augmentations) queue_dtypes = [tf.float32, tf.int32, tf.string, tf.float32] queue_names = ['image', 'bboxes', 'filename', 'scale_factor'] queue_values = { 'image': image, 'bboxes': bboxes, 'filename': filename, 'scale_factor': preprocessing_details['scale_factor'], } return queue_values, queue_dtypes, queue_names
def read_and_decode_single_example(self, max_num_steps, from_filename=None, from_example=None, num_epochs=None): #"data/tf_train_data.txt" assert from_filename is not None or from_example is not None if from_filename: filename_queue = tf.train.string_input_producer([from_filename], num_epochs=num_epochs) reader = tf.TFRecordReader() _, ex = reader.read(filename_queue) else: ex = from_example.SerializeToString() context_features = { "phones.shape": tf.FixedLenFeature([2], dtype=tf.int64), "stresses.shape": tf.FixedLenFeature([2], dtype=tf.int64), "chars.shape": tf.FixedLenFeature([2], dtype=tf.int64), "verse_length": tf.FixedLenFeature([1], dtype=tf.int64) } for r in xrange(self.max_nrps): context_features["rapper" + str(r)] = tf.FixedLenFeature([self.len_rapper_vector], dtype=tf.int64) sequence_features = { "phones": tf.FixedLenSequenceFeature([], dtype=tf.int64), "stresses": tf.FixedLenSequenceFeature([], dtype=tf.int64), "chars": tf.FixedLenSequenceFeature([], dtype=tf.int64), "labels": tf.FixedLenSequenceFeature([], dtype=tf.int64), "phones.lengths": tf.FixedLenSequenceFeature([], dtype=tf.int64), "stresses.lengths": tf.FixedLenSequenceFeature([], dtype=tf.int64), "chars.lengths": tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=ex, context_features=context_features, sequence_features=sequence_features ) casted_tensors = self.cast_tensors(context_parsed, sequence_parsed) to_batch = {k: v for k, v in casted_tensors.iteritems() if k in sequence_features} verse_length = casted_tensors.pop('verse_length') context_features = [k for k in casted_tensors if k not in sequence_features] for c in context_features: multiples = tf.pack([verse_length[0], 1]) to_batch[c] = tf.tile(tf.expand_dims(casted_tensors[c], 0), multiples) init_op_local = tf.initialize_local_variables() return to_batch, init_op_local
def batches_from_queue(filename_queue, batch_size, return_segments=False): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) sequence_features = { "features": tf.FixedLenSequenceFeature([84], dtype=tf.float32), "labels": tf.FixedLenSequenceFeature([], dtype=tf.int64) } if return_segments: sequence_features['segments'] = tf.FixedLenSequenceFeature([], dtype=tf.int64) context, sequence = tf.parse_single_sequence_example( serialized_example, context_features={ "length": tf.FixedLenFeature([], dtype=tf.int64), "track_id": tf.FixedLenFeature([], dtype=tf.string, default_value='unknown') }, sequence_features=sequence_features) min_after_dequeue = 10000 capacity = min_after_dequeue + 3 * batch_size if return_segments: return tf.train.batch( [ context['track_id'], context['length'], sequence['features'], sequence['labels'], sequence['segments'], ], batch_size=batch_size, capacity=capacity, dynamic_pad=True, #num_threads=4 ) else: return tf.train.batch( [ context['track_id'], context['length'], sequence['features'], sequence['labels'], ], batch_size=batch_size, capacity=capacity, dynamic_pad=True, #num_threads=4 )
def example_parser(self, filename_queue): reader = tf.TFRecordReader() key, record_string = reader.read(filename_queue) features = { 'labels': tf.FixedLenSequenceFeature([], tf.int64), 'char_list': tf.FixedLenSequenceFeature([], tf.int64), 'sent_len': tf.FixedLenSequenceFeature([], tf.int64), } _, example = tf.parse_single_sequence_example(serialized=record_string, sequence_features=features) labels = example['labels'] char_list = example['char_list'] sent_len = example['sent_len'] return labels, char_list, sent_len
def get_padded_batch(file_list, batch_size, input_size, num_enqueuing_threads=4): """Reads batches of SequenceExamples from TFRecords and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: A list of paths to TFRecord files containing SequenceExamples. batch_size: The number of SequenceExamples to include in each batch. input_size: The size of each input vector. The returned batch of inputs will have a shape [batch_size, num_steps, input_size]. num_enqueuing_threads: The number of threads to use for enqueuing SequenceExamples. Returns: inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s. labels: A tensor of shape [batch_size, num_steps] of int64s. lengths: A tensor of shape [batch_size] of int32s. The lengths of each SequenceExample before padding. """ file_queue = tf.train.string_input_producer(file_list) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) sequence_features = { 'inputs': tf.FixedLenSequenceFeature(shape=[input_size], dtype=tf.float32), 'labels': tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64)} _, sequence = tf.parse_single_sequence_example( serialized_example, sequence_features=sequence_features) length = tf.shape(sequence['inputs'])[0] queue = tf.PaddingFIFOQueue( capacity=1000, dtypes=[tf.float32, tf.int64, tf.int32], shapes=[(None, input_size), (None,), ()]) enqueue_ops = [queue.enqueue([sequence['inputs'], sequence['labels'], length])] * num_enqueuing_threads tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) return queue.dequeue_many(batch_size)
def _read_single_sequence_example(file_list, tokens_shape=None): """Reads and parses SequenceExamples from TFRecord-encoded file_list.""" tf.logging.info('Constructing TFRecordReader from files: %s', file_list) file_queue = tf.train.string_input_producer(file_list) reader = tf.TFRecordReader() seq_key, serialized_record = reader.read(file_queue) ctx, sequence = tf.parse_single_sequence_example( serialized_record, sequence_features={ data_utils.SequenceWrapper.F_TOKEN_ID: tf.FixedLenSequenceFeature(tokens_shape or [], dtype=tf.int64), data_utils.SequenceWrapper.F_LABEL: tf.FixedLenSequenceFeature([], dtype=tf.int64), data_utils.SequenceWrapper.F_WEIGHT: tf.FixedLenSequenceFeature([], dtype=tf.float32), }) return seq_key, ctx, sequence
def _process_tf_record_proto(serialized_proto): context, sequence = tf.parse_single_sequence_example( serialized_proto, context_features = { "image/image_id": tf.FixedLenFeature([], dtype=tf.int64)}, sequence_features = { "image/caption_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64), "image/parts_of_speech_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64), "image/image_features": tf.FixedLenSequenceFeature([], dtype=tf.float32), "image/object_features": tf.FixedLenSequenceFeature([], dtype=tf.float32)}) image_id, caption = ( context["image/image_id"], sequence["image/caption_ids"]) image_features, parts_of_speech = ( sequence["image/image_features"], sequence["image/parts_of_speech_ids"]) object_features = sequence["image/object_features"] return {"image_id": image_id, "caption": caption, "image_features": image_features, "object_features": object_features, "parts_of_speech": parts_of_speech}
def get_instance(self, proto): """Parse the proto to prepare instance.""" context_features = { "cint_len": tf.FixedLenFeature([], tf.int64), } sequence_features = { "cint": tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64), } # parse a sequence example given the above instructions on the structure context, sequence = tf.parse_single_sequence_example( serialized=proto, context_features=context_features, sequence_features=sequence_features) cint = sequence["cint"] cint_len = context["cint_len"] return {"char": cint, "char_len": cint_len}
def read_record(filename_queue): """Read record""" reader = tf.TFRecordReader() _, record_string = reader.read(filename_queue) _, example = tf.parse_single_sequence_example( record_string, context_features={'length': tf.FixedLenFeature([], tf.float32)}, sequence_features={ 'features': tf.FixedLenSequenceFeature([], tf.float32), 'labels': tf.FixedLenSequenceFeature([], tf.float32) }) feature = example['features'] label = example['labels'] return feature, label
def _single_example_parser(self, serialized_example): context_features = { "label": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "sequence": tf.FixedLenSequenceFeature([], dtype=tf.int64), "chars": tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features ) labels = context_parsed['label'] sequences = sequence_parsed return sequences, labels
def parse(ex): # Explain to TF how to go from a serialized example back to tensors context_features = { "label": tf.FixedLenFeature([], dtype=tf.int64), "user": tf.FixedLenFeature([], dtype=tf.int64), } sequence_features = { "sentence": tf.FixedLenSequenceFeature([], dtype=tf.int64) } # Parse the example (returns a dictionary of tensors) context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=ex, context_features=context_features, sequence_features=sequence_features ) return {"sentence": sequence_parsed["sentence"], "user": context_parsed["user"], "label": context_parsed["label"]}
def _parse_single_example(self, example_proto): context, sequence = tf.parse_single_sequence_example( example_proto, context_features={ "image": tf.FixedLenFeature([], dtype=tf.string), }, sequence_features={ "caption": tf.FixedLenSequenceFeature([], dtype=tf.string) }) if not self.precompute: image = tf.image.decode_jpeg(context["image"], channels=3) image = self._vgg_preprocess(image) else: image = tf.decode_raw(context['image'], out_type=tf.float32) caption = tf.cast(sequence["caption"], tf.string)[:50] # max_len allowed is 50 return image, caption, tf.size(caption)
def _parse_function(example_proto): contexts, features = tf.parse_single_sequence_example( example_proto, context_features={"video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={'audio_embedding' : tf.FixedLenSequenceFeature([10], dtype=tf.string) }) decoded_features = tf.reshape( tf.cast(tf.decode_raw(features['audio_embedding'], tf.uint8), tf.float32), [-1, 128]) labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (527,), 1, validate_indices=False), tf.bool)) return decoded_features, labels # and the labels?
def deserialize_fasta_sequence(example): context = { 'protein_length': tf.FixedLenFeature([1], tf.int64), 'id': tf.FixedLenFeature([], tf.string) } features = { 'primary': tf.FixedLenSequenceFeature([1], tf.int64), } context, features = tf.parse_single_sequence_example( example, context_features=context, sequence_features=features) return { 'id': context['id'], 'primary': tf.to_int32(features['primary'][:, 0]), 'protein_length': tf.to_int32(context['protein_length'][0]) }
def parse_example(example): context_feature = { 'ptid': tf.FixedLenFeature([], tf.string), 'primout': tf.FixedLenFeature([], tf.int64) } sequence_feature = {'frames': tf.FixedLenSequenceFeature([], tf.string)} context_parsed, sequence_parsed = tf.parse_single_sequence_example( example, context_features=context_feature, sequence_features=sequence_feature) ptid = tf.cast(context_parsed['ptid'], tf.string) primout = tf.cast(context_parsed['primout'], tf.int64) frames = tf.decode_raw(sequence_parsed['frames'], tf.float64) frames = tf.cast(frames, tf.float32) frames = tf.reshape(frames, [-1, 224, 224, 3]) return (tf.expand_dims(ptid, axis=0), tf.expand_dims(primout, axis=0), frames)
def _parse_tfexample(serialized_example): '''parse serialized tf.train.SequenceExample to tensors context features : label, task sequence features: sentence ''' context_features={'label' : tf.FixedLenFeature([], tf.int64), 'task' : tf.FixedLenFeature([], tf.int64)} sequence_features={'sentence': tf.FixedLenSequenceFeature([], tf.int64)} context_dict, sequence_dict = tf.parse_single_sequence_example( serialized_example, context_features = context_features, sequence_features = sequence_features) sentence = sequence_dict['sentence'] label = context_dict['label'] task = context_dict['task'] return task, label, sentence
def _decode_record(record, name_to_features): """Decodes a record to a TensorFlow example.""" _, example = tf.parse_single_sequence_example( record, sequence_features=name_to_features) # tf.Example only supports tf.int64, but the TPU only supports tf.int32. # So cast all int64 to int32. for name in list(example.keys()): t = example[name] if t.dtype == tf.int64: t = tf.to_int32(t) shape = tf.shape(example[name]) # sequence_examples come with dynamic/unknown dimension which we reshape # to explicit dimension for the fewshot "batch" size. example[name] = tf.reshape( t, tf.concat([[fewshot_batch], shape[1:]], 0)) return example
def read_record(filename_queue): """Read record""" reader = tf.TFRecordReader() _, record_string = reader.read(filename_queue) _, example = tf.parse_single_sequence_example( record_string, None, sequence_features={ 'feature_list': tf.FixedLenSequenceFeature(16, tf.float32), 'feature_list_labels': tf.FixedLenSequenceFeature(16, tf.float32) }) feature = example['feature_list'] label = example['feature_list_labels'] return feature, label
def tfrecord_parse_sparse_fn(example_proto): context_features = { "feat_len": tf.FixedLenFeature([], dtype=tf.int64), "target_len": tf.FixedLenFeature([], dtype=tf.int64), "target": tf.VarLenFeature(dtype=tf.int64) } sequence_features = { "feature": tf.VarLenFeature(dtype=tf.float32), } # Parse the example (returns a dictionary of tensors) context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=example_proto, context_features=context_features, sequence_features=sequence_features ) return sequence_parsed["feature"], context_parsed["target"], context_parsed["feat_len"], context_parsed["target_len"]
def train_data_parser(self, serialized_example): context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized_example, context_features=({ "sequence_length": tf.FixedLenFeature([], dtype=tf.int64) }), sequence_features=({ "input": tf.FixedLenSequenceFeature([self.__input_size], dtype=tf.float32), "output": tf.FixedLenSequenceFeature([self.__output_size], dtype=tf.float32) })) return context_parsed["sequence_length"], sequence_parsed[ "input"], sequence_parsed["output"]
def feature_parser(example): context_features = {'movie_id': tf.FixedLenFeature([], tf.string)} sequence_features = { 'audio_embedding': tf.FixedLenSequenceFeature([], tf.string) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( example, context_features=context_features, sequence_features=sequence_features) normalized_feature = tf.divide( tf.decode_raw(sequence_parsed['audio_embedding'], tf.uint8), tf.constant(255, tf.uint8)) shaped_feature = tf.reshape(tf.cast(normalized_feature, tf.float32), [-1, 128]) return context_parsed['movie_id'], shaped_feature
def prepare_reader(self, filename_queue, max_quantized_value=2, min_quantized_value=-2): reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features, sequence_features = { "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64), }, None if self.sequence_data: sequence_features = { self.feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string), } else: context_features[self.feature_name] = tf.FixedLenFeature( self.feature_size, tf.float32) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features=context_features, sequence_features=sequence_features) labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes, ), 1), tf.bool)) if self.sequence_data: decoded_features = tf.reshape( tf.cast(tf.decode_raw(features[self.feature_name], tf.uint8), tf.float32), [-1, self.feature_size]) num_frames = tf.minimum( tf.shape(decoded_features)[0], self.max_frames) video_matrix = Dequantize(decoded_features, max_quantized_value, min_quantized_value) else: video_matrix = contexts[self.feature_name] num_frames = tf.constant(-1) # Pad or truncate to 'max_frames' frames. # video_matrix = resize_axis(video_matrix, 0, self.max_frames) return contexts["video_id"], video_matrix, labels, num_frames
def parse_sequence_example(serialized): sequence_features = { "words": tf.FixedLenSequenceFeature([], dtype=tf.int64), #"mask_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64), #"segs_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64), # in order to have a vector. if i put [1] it will probably # be a matrix with just one column # "chars": tf.VarLenFeature(tf.int64), # "chars_len": tf.FixedLenSequenceFeature([], dtype=tf.int64), "begin_span": tf.FixedLenSequenceFeature([], dtype=tf.int64), "end_span": tf.FixedLenSequenceFeature([], dtype=tf.int64), "cand_entities": tf.VarLenFeature(tf.int64), "cand_entities_ids": tf.VarLenFeature(tf.int64), "cand_entities_scores": tf.VarLenFeature(tf.float32), "cand_entities_labels": tf.VarLenFeature(tf.int64), "cand_entities_len": tf.FixedLenSequenceFeature([], dtype=tf.int64), "ground_truth": tf.FixedLenSequenceFeature([], dtype=tf.int64) } if True: sequence_features["begin_gm"] = tf.FixedLenSequenceFeature( [], dtype=tf.int64) sequence_features["end_gm"] = tf.FixedLenSequenceFeature( [], dtype=tf.int64) context, sequence = tf.parse_single_sequence_example( serialized, context_features={ "chunk_id": tf.FixedLenFeature([], dtype=tf.string), "words_len": tf.FixedLenFeature([], dtype=tf.int64), "spans_len": tf.FixedLenFeature([], dtype=tf.int64), "ground_truth_len": tf.FixedLenFeature([], dtype=tf.int64) }, sequence_features=sequence_features) return (context["chunk_id"], tf.cast(sequence["words"], dtype=tf.int32), context["words_len"],\ #tf.cast(sequence["mask_ids"], dtype=tf.int32), tf.cast(sequence["segs_ids"], dtype=tf.int32),\ # tf.sparse_tensor_to_dense(sequence["chars"]), sequence["chars_len"],\ sequence["begin_span"], sequence["end_span"], context["spans_len"],\ tf.sparse_tensor_to_dense(sequence["cand_entities"]),\ tf.sparse_tensor_to_dense(sequence["cand_entities_ids"]),\ tf.sparse_tensor_to_dense(sequence["cand_entities_scores"]),\ tf.sparse_tensor_to_dense(sequence["cand_entities_labels"]),\ sequence["cand_entities_len"],\ sequence["ground_truth"], context["ground_truth_len"],\ sequence["begin_gm"], sequence["end_gm"])
def parse_tf_example(example, features): context_features = { 'label': tf.FixedLenFeature([], dtype=tf.int64), 'length': tf.FixedLenFeature([], dtype=tf.int64), 'image': tf.FixedLenFeature([], dtype=tf.string) } sequence_features = { 'tokens': tf.FixedLenSequenceFeature([], dtype=tf.int64), 'word_tokens': tf.FixedLenSequenceFeature([], dtype=tf.int64), 'uncased_word_tokens': tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_parsed, sequence_parsed \ = tf.parse_single_sequence_example(context_features=context_features, sequence_features=sequence_features, serialized=example) decoded_image = tf.image.decode_jpeg(context_parsed['image']) resized_image = tf.cast( tf.round(tf.image.resize_images(decoded_image, IMAGE_DIMS[:-1])), tf.uint8) adjusted_label = context_parsed['label'] - 1 one_hot = tf.one_hot(adjusted_label, NUM_CLASSES, dtype=tf.float32) word_tokens = sequence_parsed['word_tokens'] + tf.constant(1, dtype=tf.int64) uncased_word_tokens = sequence_parsed['uncased_word_tokens'] + tf.constant( 1, dtype=tf.int64) all_features = { 'tokens': sequence_parsed['tokens'], 'word_tokens': word_tokens, 'uncased_word_tokens': uncased_word_tokens, 'length': context_parsed['length'], 'image': resized_image } # Not returning features we don't need saves computation time # In pure TF code it wouldn't matter, # but Keras must force evaluation at some point returned_features = { k: v for k, v in all_features.items() if k in features } return (returned_features, one_hot)
def _generate_feats_and_label_batch(filename_queue, batch_size): """Construct a queued batch of spectral features and transcriptions. Args: filename_queue: queue of filenames to read data from. batch_size: Number of utterances per batch. Returns: feats: mfccs. 4D tensor of [batch_size, height, width, 3] size. labels: transcripts. List of length batch_size. seq_lens: Sequence Lengths. List of length batch_size. """ # Define how to parse the example reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features = { "seq_len": tf.FixedLenFeature([], dtype=tf.int64), "labels": tf.VarLenFeature(dtype=tf.int64) } sequence_features = { # mfcc features are 13 dimensional "feats": tf.FixedLenSequenceFeature([ 13, ], dtype=tf.float32) } # Parse the example (returns a dictionary of tensors) context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features) # Generate a batch worth of examples after bucketing seq_len, (feats, labels) = tf.contrib.training.bucket_by_sequence_length( input_length=tf.cast(context_parsed['seq_len'], tf.int32), tensors=[sequence_parsed['feats'], context_parsed['labels']], batch_size=batch_size, bucket_boundaries=list(range(100, 1900, 100)), allow_smaller_final_batch=True, num_threads=16, dynamic_pad=True) return feats, tf.cast(labels, tf.int32), seq_len
def main(_): # WRITE PHASE print 'WRITE PHASE' record_writer = tf.python_io.TFRecordWriter(FLAGS.record_file) sequence = load_sequence() track = construct_tracks_from_sequence(sequence) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in xrange(30): value = sess.run(track) record_writer.write(value) coord.request_stop() coord.join(threads) record_writer.close() # READ PHASE print 'READ PHASE' record_reader = tf.TFRecordReader() record_queue = tf.train.string_input_producer([FLAGS.record_file]) key, value = record_reader.read(record_queue) context, sequence_example = tf.parse_single_sequence_example( value, context_features={ 'sequence': tf.FixedLenFeature([], tf.string), 'length': tf.FixedLenFeature([], tf.int64) }, sequence_features={ 'frame': tf.FixedLenSequenceFeature([], tf.int64), #'bndbox': tf.FixedLenSequenceFeature([4], tf.int64), #'occluded': tf.FixedLenSequenceFeature([], tf.int64), #'generated': tf.FixedLenSequenceFeature([], tf.int64) }) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in xrange(30): print sess.run([context, sequence_example]) coord.request_stop() coord.join(threads)
def read_and_decode(filename_queue): print('Reading and Decoding') reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features = { 'num_pair': tf.FixedLenFeature([], dtype=tf.int64), "answer": tf.FixedLenFeature([], dtype=tf.int64), "question_word_len": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "xyz_coords": tf.FixedLenSequenceFeature([], dtype=tf.string), "material": tf.FixedLenSequenceFeature([], dtype=tf.string), "size": tf.FixedLenSequenceFeature([], dtype=tf.string), "rotation": tf.FixedLenSequenceFeature([], dtype=tf.string), "pixel_coords": tf.FixedLenSequenceFeature([], dtype=tf.string), "color": tf.FixedLenSequenceFeature([], dtype=tf.string), "shape": tf.FixedLenSequenceFeature([], dtype=tf.string), "question": tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( context_features=context_features, serialized=serialized_example, sequence_features=sequence_features) decoded_data = dict() for key in sequence_parsed: if key in ['xyz_coords', 'pixel_coords', 'rotation']: decoded_data[key] = tf.cast(tf.decode_raw(sequence_parsed[key], tf.float64), tf.float32, name=key) elif key in ['material', 'size', 'color', 'shape']: decoded_data[key] = tf.cast(tf.decode_raw(sequence_parsed[key], tf.int64), tf.int32, name=key) elif key in ['question']: decoded_data[key] = sequence_parsed[key] else: raise AttributeError return decoded_data, context_parsed
def t1(): #keys=[[1.0,2.0],[2.0,3.0]] print("t1" + "=" * 20) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) def make_example(locale, age, score, times): example = tf.train.SequenceExample( context=tf.train.Features( feature={ "locale": tf.train.Feature(bytes_list=tf.train.BytesList( value=[locale])), "age": tf.train.Feature(int64_list=tf.train.Int64List( value=[age])) }), feature_lists=tf.train.FeatureLists( feature_list={ "movie_rating": tf.train.FeatureList(feature=[ tf.train.Feature(float_list=tf.train.FloatList( value=score)) for i in range(times) ]) })) return example.SerializeToString() context_features = { "locale": tf.FixedLenFeature([], dtype=tf.string), "age": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "movie_rating": tf.FixedLenSequenceFeature([3], dtype=tf.float32, allow_missing=True) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( make_example(locale='china', age=24, score=[1.0, 3.5, 4.0], times=2), context_features=context_features, sequence_features=sequence_features) print(tf.contrib.learn.run_n(context_parsed)) print(tf.contrib.learn.run_n(sequence_parsed)) """
def eval_input_pipeline(tfrecords_dir, file_pattern): tfrecords_list = glob.glob(os.path.join(tfrecords_dir, file_pattern)) train_file_num = int(len(tfrecords_list) * 0.9) tfrecords_list = tfrecords_list[(train_file_num + 1):] filename_queue = tf.train.string_input_producer( tfrecords_list, num_epochs=FLAGS.num_epochs) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) context_features = {"id": tf.FixedLenFeature([], tf.string)} feature_names = ["rgb", "audio"] sequence_features = { feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in feature_names } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features) video_decoded_features = tf.reshape( tf.cast(tf.decode_raw(sequence_parsed['rgb'], tf.float64), tf.float32), [-1, 1024]) audio_decoded_features = tf.reshape( tf.cast(tf.decode_raw(sequence_parsed['audio'], tf.float64), tf.float32), [-1, 128]) vid = context_parsed['id'] vvid, video_batch_data, audio_batch_data = tf.train.batch( tensors=[vid, video_decoded_features, audio_decoded_features], batch_size=FLAGS.batch_size, num_threads=10, capacity=4 * FLAGS.batch_size, allow_smaller_final_batch=True, dynamic_pad=True) video_batch_data = resize_axis(tensor=video_batch_data, axis=1, new_size=FLAGS.max_frames) audio_batch_data = resize_axis(tensor=audio_batch_data, axis=1, new_size=FLAGS.max_frames) return vvid, video_batch_data, audio_batch_data, len(tfrecords_list)
def _decode_train_example(self, ex_serial): context_features = { ExampleString.sequent_length: tf.FixedLenFeature([], dtype=tf.int64), ExampleString.input_start_date: tf.FixedLenFeature([], dtype=tf.string), ExampleString.input_end_date: tf.FixedLenFeature([], dtype=tf.string), ExampleString.target__start_date: tf.FixedLenFeature([], dtype=tf.string), ExampleString.target_end_date: tf.FixedLenFeature([], dtype=tf.string), ExampleString.token: tf.FixedLenFeature([], dtype=tf.string) } sequence_features = { ExampleString.input_sequence: tf.FixedLenSequenceFeature([], dtype=tf.float32), ExampleString.target_sequence: tf.FixedLenSequenceFeature([], dtype=tf.float32) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=ex_serial, context_features=context_features, sequence_features=sequence_features ) return context_parsed, sequence_parsed
def parse_sequence_example(filename_queue): #reads a TFRecord into its constituent parts reader = tf.TFRecordReader() _, example = reader.read(filename_queue) context_features = { "length": tf.FixedLenFeature([], dtype=tf.int64), "length_t2": tf.FixedLenFeature([], dtype=tf.int64), "img_h": tf.FixedLenFeature([], dtype=tf.int64), "img_c": tf.FixedLenFeature([], dtype=tf.int64), "pnt_h": tf.FixedLenFeature([], dtype=tf.int64), "pnt_c": tf.FixedLenFeature([], dtype=tf.int64), "pre_act": tf.FixedLenFeature([], dtype=tf.int64), "act": tf.FixedLenFeature([], dtype=tf.int64), "pos_act": tf.FixedLenFeature([], dtype=tf.int64), "state": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "image_raw": tf.FixedLenSequenceFeature([], dtype=tf.string), "points": tf.FixedLenSequenceFeature([], dtype=tf.string), "audio_raw": tf.FixedLenSequenceFeature([], dtype=tf.string), "image_raw_t2": tf.FixedLenSequenceFeature([], dtype=tf.string), "points_t2": tf.FixedLenSequenceFeature([], dtype=tf.string), "audio_raw_t2": tf.FixedLenSequenceFeature([], dtype=tf.string) } # Parse the example context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=example, context_features=context_features, sequence_features=sequence_features ) sequence_data = { "image_raw": tf.decode_raw(sequence_parsed["image_raw"], tf.uint8), "points": tf.decode_raw(sequence_parsed["points"], tf.uint8), "audio_raw": tf.decode_raw(sequence_parsed["audio_raw"], tf.uint8), "image_raw_t2": tf.decode_raw(sequence_parsed["image_raw_t2"], tf.uint8), "points_t2": tf.decode_raw(sequence_parsed["points_t2"], tf.uint8), "audio_raw_t2": tf.decode_raw(sequence_parsed["audio_raw_t2"], tf.uint8) } return context_parsed, sequence_data
def parse(example_proto): context_features = { "seq_length": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "chars": tf.FixedLenSequenceFeature([], dtype=tf.int64), "tags": tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=example_proto, context_features=context_features, sequence_features=sequence_features) seq_length = context_parsed["seq_length"] chars = sequence_parsed["chars"] tags = sequence_parsed["tags"] return seq_length, chars, tags
def test_parse_single_sequence_example(self): serialized = self.__class__.make_example([1, 2]).SerializeToString() context_features = { 'length': tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { 'mod_2': tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_output, feature_list_output = tf.parse_single_sequence_example( serialized, context_features=context_features, sequence_features=sequence_features ) sess = tf.Session() context, feature_list = sess.run([context_output, feature_list_output]) self.assertDictEqual(context, {'length': 2}) np.testing.assert_equal(feature_list, {'mod_2': np.array([1, 0])})
def _parse_dataset(example_proto): features = { 'inputs': tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64), 'masks': tf.FixedLenSequenceFeature(shape=[], dtype=tf.int64), } context_feature = { 'label': tf.FixedLenFeature(shape=[], dtype=tf.int64), 'length': tf.FixedLenFeature(shape=[], dtype=tf.int64) } # parse each sequence example context_parsed, sequence = tf.parse_single_sequence_example( example_proto, context_features=context_feature, sequence_features=features) # return (sequence['inputs'], sequence['masks'], [context_parsed['length']]), tf.one_hot( # indices=[context_parsed['label']], depth=3) return sequence['inputs'], sequence['masks'], context_parsed['length'], [ context_parsed['label'] ]
def parse_tfrecord(sereialized_example): context_features = { "lexical" : tf.FixedLenFeature([6],tf.int64), "label" : tf.FixedLenFeature([],tf.int64), "wordnet": tf.FixedLenFeature([2],tf.int64) } sequence_features = { "sentence" : tf.FixedLenSequenceFeature([3],tf.int64), "position" : tf.FixedLenSequenceFeature([2],tf.int64), } contex_dict,sequence_dic = tf.parse_single_sequence_example(sereialized_example, context_features=context_features,sequence_features=sequence_features) sentence = sequence_dic["sentence"] position = sequence_dic["position"] lexical = contex_dict["lexical"] wordnet = contex_dict["wordnet"] label = contex_dict["label"] return label,lexical,wordnet,position,sentence
def parse_sequence_example(serialized, sample_feature): """Parse a tensorflow.SequenceExample into an real sample. Args: serialized: A scalar string Tensor, a single serialized SequenceExample. sample_feature: Name of SequenceExample feature list you have set in Serialized Return: A raw sample. """ _, sequence = tf.parse_single_sequence_example( serialized, # Here I have not context when convert to sequence example, context features is none # context_features= None sequence_features={ sample_feature: tf.FixedLenSequenceFeature([], dtype=tf.string) }) sample = sequence['sample'] return sample
def get_features_and_labels(feature_names, input_tfrecord_data_path, num_classes): """ Utility function to get the features and labels from the multiclass samples' tfrecords :param feature_names: :param input_tfrecord_data_path: :param num_classes: :return: """ list_of_feature_names = [ feature_names.strip() for feature_names in feature_names.split(',') ] # now read the input tfrecord files from the given path files = gfile.Glob(input_tfrecord_data_path) if not files: raise IOError("Unable to find training files. tfrecord_data_path='" + input_tfrecord_data_path + "'.") logging.info("Number of training files: %s.", str(len(files))) files.reverse() filename_queue = tf.train.string_input_producer(files, num_epochs=1, shuffle=False) reader = tf.TFRecordReader() filename, serialized_example = reader.read(filename_queue) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64) }, sequence_features={ feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in list_of_feature_names }) context_video_id = contexts["video_id"] # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (num_classes, ), 1, validate_indices=False), tf.int32)) return context_video_id, features, labels
def parse_example(serialized): context_features = { 'train/label': tf.FixedLenFeature((), tf.int64), 'train/video': tf.VarLenFeature(dtype=tf.float32) } # context_features = {'train/label' : tf.FixedLenFeature((), tf.int64), # 'train/video' : tf.FixedLenFeature((), tf.string)} context_parsed, _ = tf.parse_single_sequence_example( serialized=serialized, context_features=context_features, sequence_features={}) # video = tf.image.decode_jpeg(context_parsed['train/video'], channels=3) video = tf.reshape(tf.sparse.to_dense(context_parsed['train/video']), shape=[-1, 224, 224, 3]) label = context_parsed['train/label'] return video, label
def _test(self, kwargs, expected_context_values=None, expected_feat_list_values=None, expected_err_re=None): expected_context_values = expected_context_values or {} expected_feat_list_values = expected_feat_list_values or {} with self.test_session() as sess: # Pull out some keys to check shape inference context_dense_keys = kwargs["context_dense_keys"] if "context_dense_keys" in kwargs else [] context_sparse_keys = kwargs["context_sparse_keys"] if "context_sparse_keys" in kwargs else [] context_dense_shapes = kwargs["context_dense_shapes"] if "context_dense_shapes" in kwargs else [] feature_list_dense_keys = kwargs["feature_list_dense_keys"] if "feature_list_dense_keys" in kwargs else [] feature_list_dense_shapes = ( kwargs["feature_list_dense_shapes"] if "feature_list_dense_shapes" in kwargs else [] ) # Returns dict w/ Tensors and SparseTensors (context_out, feat_list_out) = tf.parse_single_sequence_example(**kwargs) # Check shapes; if serialized is a Tensor we need its size to # properly check. if context_dense_shapes: self.assertEqual(len(context_dense_keys), len(context_dense_shapes)) for (k, s) in zip(context_dense_keys, context_dense_shapes): self.assertEqual(tuple(context_out[k].get_shape().as_list()), s) for k in context_sparse_keys: self.assertEqual(tuple(context_out[k].indices.get_shape().as_list()), (None, 1)) self.assertEqual(tuple(context_out[k].values.get_shape().as_list()), (None,)) self.assertEqual(tuple(context_out[k].shape.get_shape().as_list()), (1,)) if feature_list_dense_shapes: self.assertEqual(len(feature_list_dense_keys), len(feature_list_dense_shapes)) for (k, s) in zip(feature_list_dense_keys, feature_list_dense_shapes): self.assertEqual(tuple(feat_list_out[k].get_shape().as_list()), (None,) + s) # Check values context_result = flatten_values_tensors_or_sparse(context_out.values()) # flatten values feature_list_result = flatten_values_tensors_or_sparse(feat_list_out.values()) if expected_err_re is None: tf_context_result = sess.run(context_result) tf_feat_list_result = sess.run(feature_list_result) _compare_output_to_expected(self, context_out, expected_context_values, tf_context_result) _compare_output_to_expected(self, feat_list_out, expected_feat_list_values, tf_feat_list_result) else: with self.assertRaisesOpError(expected_err_re): sess.run(context_result)
def parse_sequence_example(serialized_example, num_views): """Parses a serialized sequence example into views, sequence length data.""" context_features = { 'task': tf.FixedLenFeature(shape=[], dtype=tf.string), 'len': tf.FixedLenFeature(shape=[], dtype=tf.int64) } view_names = ['view%d' % i for i in range(num_views)] fixed_features = [ tf.FixedLenSequenceFeature( shape=[], dtype=tf.string) for _ in range(len(view_names))] sequence_features = dict(zip(view_names, fixed_features)) context_parse, sequence_parse = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features) views = tf.stack([sequence_parse[v] for v in view_names]) lens = [sequence_parse[v].get_shape().as_list()[0] for v in view_names] assert len(set(lens)) == 1 seq_len = tf.shape(sequence_parse[v])[0] return context_parse, views, seq_len
def input_pipeline(filename, batch_size, epochs=None): file_list = [os.path.join(os.getcwd(), 'sequence_classification_data', filename)] file_queue = tf.train.string_input_producer(file_list, num_epochs=epochs) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) sequence_features = { "inputs": tf.FixedLenSequenceFeature([FEATURE_SIZE_PER_TIMESTEP], dtype=tf.float32), "label": tf.FixedLenSequenceFeature([], dtype=tf.int64) } _, sequence = tf.parse_single_sequence_example( serialized=serialized_example, sequence_features=sequence_features) actual_length = tf.shape(sequence["inputs"])[0] batch_lengths, batch_sequences, batch_labels = tf.train.batch( [actual_length, sequence["inputs"], sequence["label"]], batch_size=batch_size, dynamic_pad=True, allow_smaller_final_batch=True, name="input_batching") return batch_lengths, batch_sequences, batch_labels
def example_parser(self, filename_queue): reader = tf.TFRecordReader() key, record_string = reader.read(filename_queue) features = { 'labels': tf.FixedLenSequenceFeature([], tf.int64), 'tokens': tf.FixedLenSequenceFeature([], tf.int64), 'shapes': tf.FixedLenSequenceFeature([], tf.int64), 'chars': tf.FixedLenSequenceFeature([], tf.int64), 'seq_len': tf.FixedLenSequenceFeature([], tf.int64), 'tok_len': tf.FixedLenSequenceFeature([], tf.int64), } _, example = tf.parse_single_sequence_example(serialized=record_string, sequence_features=features) labels = example['labels'] tokens = example['tokens'] shapes = example['shapes'] chars = example['chars'] seq_len = example['seq_len'] tok_len = example['tok_len'] # context = c['context'] return labels, tokens, shapes, chars, seq_len, tok_len
def _assign_queue(self, proto_text): """ Args: proto_text: object to be enqueued and managed by parallel threads. """ with tf.variable_scope('shuffle_queue'): queue = tf.RandomShuffleQueue( capacity=self.capacity, min_after_dequeue=10*self.batch_size, dtypes=tf.string, shapes=[()]) enqueue_op = queue.enqueue(proto_text) example_dq = queue.dequeue() qr = tf.train.QueueRunner(queue, [enqueue_op] * 4) tf.train.add_queue_runner(qr) _sequence_lengths, _sequences = tf.parse_single_sequence_example( serialized=example_dq, context_features=LENGTHS, sequence_features=SEQUENCES) return _sequence_lengths, _sequences
def decode(self, serialized_example, items=None): """Decodes the given serialized TF-example. Args: serialized_example: a serialized TF-example tensor. items: the list of items to decode. These must be a subset of the item keys in self._items_to_handlers. If `items` is left as None, then all of the items in self._items_to_handlers are decoded. Returns: the decoded items, a list of tensor. """ context, sequence = tf.parse_single_sequence_example( serialized_example, self._context_keys_to_features, self._sequence_keys_to_features) # Merge context and sequence features example = {} example.update(context) example.update(sequence) all_features = {} all_features.update(self._context_keys_to_features) all_features.update(self._sequence_keys_to_features) # Reshape non-sparse elements just once: for k, value in all_features.items(): if isinstance(value, tf.FixedLenFeature): example[k] = tf.reshape(example[k], value.shape) if not items: items = self._items_to_handlers.keys() outputs = [] for item in items: handler = self._items_to_handlers[item] keys_to_tensors = {key: example[key] for key in handler.keys} outputs.append(handler.tensors_to_item(keys_to_tensors)) return outputs
def ReadInput(data_filepattern, shuffle, params): """Read the tf.SequenceExample tfrecord files. Args: data_filepattern: tf.SequenceExample tfrecord filepattern. shuffle: Whether to shuffle the examples. params: parameter dict. Returns: image sequence batch [batch_size, seq_len, image_size, image_size, channel]. """ image_size = params['image_size'] filenames = tf.gfile.Glob(data_filepattern) filename_queue = tf.train.string_input_producer(filenames, shuffle=shuffle) reader = tf.TFRecordReader() _, example = reader.read(filename_queue) feature_sepc = { 'moving_objs': tf.FixedLenSequenceFeature( shape=[image_size * image_size * 3], dtype=tf.float32)} _, features = tf.parse_single_sequence_example( example, sequence_features=feature_sepc) moving_objs = tf.reshape( features['moving_objs'], [params['seq_len'], image_size, image_size, 3]) if shuffle: examples = tf.train.shuffle_batch( [moving_objs], batch_size=params['batch_size'], num_threads=64, capacity=params['batch_size'] * 100, min_after_dequeue=params['batch_size'] * 4) else: examples = tf.train.batch([moving_objs], batch_size=params['batch_size'], num_threads=16, capacity=params['batch_size']) examples /= params['norm_scale'] return examples
def get_padded_batch(file_list, batch_size, input_size, label_shape=None, num_enqueuing_threads=4, shuffle=False): """Reads batches of SequenceExamples from TFRecords and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: A list of paths to TFRecord files containing SequenceExamples. batch_size: The number of SequenceExamples to include in each batch. input_size: The size of each input vector. The returned batch of inputs will have a shape [batch_size, num_steps, input_size]. label_shape: Shape for labels. If not specified, will use []. num_enqueuing_threads: The number of threads to use for enqueuing SequenceExamples. shuffle: Whether to shuffle the batches. Returns: inputs: A tensor of shape [batch_size, num_steps, input_size] of floats32s. labels: A tensor of shape [batch_size, num_steps] of int64s. lengths: A tensor of shape [batch_size] of int32s. The lengths of each SequenceExample before padding. Raises: ValueError: If `shuffle` is True and `num_enqueuing_threads` is less than 2. """ file_queue = tf.train.string_input_producer(file_list) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) sequence_features = { 'inputs': tf.FixedLenSequenceFeature(shape=[input_size], dtype=tf.float32), 'labels': tf.FixedLenSequenceFeature(shape=label_shape or [], dtype=tf.int64)} _, sequence = tf.parse_single_sequence_example( serialized_example, sequence_features=sequence_features) length = tf.shape(sequence['inputs'])[0] input_tensors = [sequence['inputs'], sequence['labels'], length] if shuffle: if num_enqueuing_threads < 2: raise ValueError( '`num_enqueuing_threads` must be at least 2 when shuffling.') shuffle_threads = int(math.ceil(num_enqueuing_threads) / 2.) # Since there may be fewer records than SHUFFLE_MIN_AFTER_DEQUEUE, take the # minimum of that number and the number of records. min_after_dequeue = count_records( file_list, stop_at=SHUFFLE_MIN_AFTER_DEQUEUE) input_tensors = _shuffle_inputs( input_tensors, capacity=QUEUE_CAPACITY, min_after_dequeue=min_after_dequeue, num_threads=shuffle_threads) num_enqueuing_threads -= shuffle_threads tf.logging.info(input_tensors) return tf.train.batch( input_tensors, batch_size=batch_size, capacity=QUEUE_CAPACITY, num_threads=num_enqueuing_threads, dynamic_pad=True, allow_smaller_final_batch=False)
def get_padded_batch(file_list, batch_size, num_enqueuing_threads=4, shuffle=False): """Reads batches of SequenceExamples from TFRecords and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: A list of paths to TFRecord files containing SequenceExamples. batch_size: The number of SequenceExamples to include in each batch. num_enqueuing_threads: The number of threads to use for enqueuing SequenceExamples. shuffle: Whether to shuffle the batches. Returns: labels: A tensor of shape [batch_size] of int64s. frames: A tensor of shape [batch_size, num_steps] of floats32s. note that num_steps is the max time_step of all the tensors. Raises: ValueError: If `shuffle` is True and `num_enqueuing_threads` is less than 2. """ file_queue = tf.train.string_input_producer(file_list) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) context_features = { "label": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "frame": tf.FixedLenSequenceFeature([], dtype=tf.int64) } context_parsed, sequence_parsed = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features ) labels = context_parsed['label'] frames = sequence_parsed['frame'] input_tensors = [labels, frames] if shuffle: if num_enqueuing_threads < 2: raise ValueError( '`num_enqueuing_threads` must be at least 2 when shuffling.') shuffle_threads = int(math.ceil(num_enqueuing_threads) / 2.) # Since there may be fewer records than SHUFFLE_MIN_AFTER_DEQUEUE, take the # minimum of that number and the number of records. min_after_dequeue = count_records( file_list, stop_at=SHUFFLE_MIN_AFTER_DEQUEUE) input_tensors = _shuffle_inputs( input_tensors, capacity=QUEUE_CAPACITY, min_after_dequeue=min_after_dequeue, num_threads=shuffle_threads) num_enqueuing_threads -= shuffle_threads tf.logging.info(input_tensors) return tf.train.batch( input_tensors, batch_size=batch_size, capacity=QUEUE_CAPACITY, num_threads=num_enqueuing_threads, dynamic_pad=True, allow_smaller_final_batch=False)
def run(): ### 1: serialize/write part tf.reset_default_graph() FEATURE_SIZE_PER_TIMESTEP = 5 sequences = [[[1.,1.,1.,1.,1.], [2.,3.,4.,5.,6.], [3.,2.,1.,0.,-1.]], [[4.,3.,1.,2.,5.], [5.,5.,5.,5.,5.], [1.,2.,3.,4.,5.]], [[1.,0.,0.,0.,1.], [2.,2.,2.,2.,2.]], [[0.,0.,0.,0.,0.], [2.,1.,0.,-1.,-2.], [4.,8.,12.,16.,20.], [7.,7.,7.,0.,1.]], [[9.,9.,9.,9.,9.], [8.,8.,1.,1.,1.]], [[5.,4.,3.,2.,1.], [4.,4.,8.,8.,8.], [3.,3.,3.,6.,6.], [2.,2.,2.,2.,1.], [1.,1.,1.,1.,1.]], [[3.,0.,3.,0.,3.], [6.,8.,3.,1.,1.], [9.,9.,9.,9.,8.]]] label_sequences = [2, 0, 1, 0, 0, 0, 1] # inputs: A list of input vectors, each input vector is a list of float32 (entries #: FEATURE_SIZE_PER_TIMESTEP) # labels: A list of int64 def make_sequence_example(inputs, label): context_features = { 'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label])) } context = tf.train.Features(feature=context_features) input_features = [tf.train.Feature(float_list=tf.train.FloatList(value=input_)) for input_ in inputs] feature_list = { 'inputs': tf.train.FeatureList(feature=input_features), } feature_lists = tf.train.FeatureLists(feature_list=feature_list) return tf.train.SequenceExample(context=context, feature_lists=feature_lists) # Write all examples into a TFRecords file data_dir = os.path.join(os.getcwd(), 'sequence_classification_data') tf.gfile.MakeDirs(data_dir) output_file = os.path.join(data_dir, 'Sequence_classification2.tfr') writer = tf.python_io.TFRecordWriter(output_file) for sequence, label_sequence in zip(sequences, label_sequences): ex = make_sequence_example(sequence, label_sequence) writer.write(ex.SerializeToString()) writer.close() ## 2: deserialize/read part tf.reset_default_graph() BATCH_SIZE = 4 FEATURE_SIZE_PER_TIMESTEP = 5 file_list = [os.path.join(os.getcwd(), 'sequence_classification_data', 'Sequence_classification2.tfr')] print(file_list) file_queue = tf.train.string_input_producer(file_list, num_epochs=1) reader = tf.TFRecordReader() _, serialized_example = reader.read(file_queue) # Define how to parse the example context_features = { "label": tf.FixedLenFeature([], dtype=tf.int64) } sequence_features = { "inputs": tf.FixedLenSequenceFeature([FEATURE_SIZE_PER_TIMESTEP], dtype=tf.float32), } # Parse the example context, sequence = tf.parse_single_sequence_example( serialized=serialized_example, context_features=context_features, sequence_features=sequence_features) actual_length = tf.shape(sequence["inputs"])[0] # Batch the variable length tensor with dynamic padding batch_lengths, batch_sequences, batch_labels = tf.train.batch( [actual_length, sequence["inputs"], context["label"]], batch_size=BATCH_SIZE, dynamic_pad=True, allow_smaller_final_batch=True, name="input_batching") with tf.Session() as sess: init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for i in range(2): lens, seqs, lbls = sess.run([batch_lengths, batch_sequences, batch_labels]) print('actual_lengths =', lens) print('batch_size=%d, time_steps=%d' % (seqs.shape[0], seqs.shape[1])) print('sequences = ', seqs) print('labels = ', lbls) except tf.errors.OutOfRangeError as e: print('Done') print(e.error_code, e.message) finally: coord.request_stop()
}) sequence_example = tf.train.SequenceExample( context=context, feature_lists=feature_lists) writer.write(sequence_example.SerializeToString()) # Serialize To String writer.close() ## 2. Simple read one image ======================================================= filename_queue = tf.train.string_input_producer(["train.cat_caption"]) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # return the file and the name of file # features, sequence_features = tf.parse_single_example(serialized_example, # see parse_single_sequence_example for sequence example features, sequence_features = tf.parse_single_sequence_example(serialized_example, context_features={ 'image/img_raw' : tf.FixedLenFeature([], tf.string), }, sequence_features={ "image/caption": tf.FixedLenSequenceFeature([], dtype=tf.string), "image/caption_ids": tf.FixedLenSequenceFeature([], dtype=tf.int64), } ) c = tf.contrib.learn.run_n(features, n=1, feed_dict=None) from PIL import Image im = Image.frombytes('RGB', (299, 299), c[0]['image/img_raw']) tl.visualize.frame(np.asarray(im), second=1, saveable=False, name='frame', fig_idx=1236) c = tf.contrib.learn.run_n(sequence_features, n=1, feed_dict=None) print(c[0]) ## 3. Prefetch serialized SequenceExample protos ================================== def distort_image(image, thread_id): """Perform random distortions on an image.