def decode(self, serialized_example, items=None): """Decodes the given serialized TF-SequenceExample. Args: serialized_example: a serialized TF-SequenceExample tensor. items: the list of items to decode. These must be a subset of the item keys in self._items_to_handlers. If `items` is left as None, then all of the items in self._items_to_handlers are decoded. Returns: the decoded items, a list of tensor. """ context, feature_list = parsing_ops.parse_single_sequence_example( serialized_example, self._keys_to_context_features, self._keys_to_sequence_features) # Reshape non-sparse elements just once: for k in self._keys_to_context_features: v = self._keys_to_context_features[k] if isinstance(v, parsing_ops.FixedLenFeature): context[k] = array_ops.reshape(context[k], v.shape) if not items: items = self._items_to_handlers.keys() outputs = [] for item in items: handler = self._items_to_handlers[item] keys_to_tensors = { key: context[key] if key in context else feature_list[key] for key in handler.keys } outputs.append(handler.tensors_to_item(keys_to_tensors)) return outputs
def _parse_example(example): ctx, seq = parsing_ops.parse_single_sequence_example( example, context_features=fc.make_parse_example_spec_v2(ctx_cols), sequence_features=fc.make_parse_example_spec_v2(seq_cols)) ctx.update(seq) return ctx
def _test_parsed_sequence_example( self, col_name, col_fn, col_arg, shape, values): """Helper function to check that each FeatureColumn parses correctly. Args: col_name: string, name to give to the feature column. Should match the name that the column will parse out of the features dict. col_fn: function used to create the feature column. For example, sequence_numeric_column. col_arg: second arg that the target feature column is expecting. shape: the expected dense_shape of the feature after parsing into a SparseTensor. values: the expected values at index [0, 2, 6] of the feature after parsing into a SparseTensor. """ example = _make_sequence_example() columns = [ fc.categorical_column_with_identity('int_ctx', num_buckets=100), fc.numeric_column('float_ctx'), col_fn(col_name, col_arg) ] context, seq_features = parsing_ops.parse_single_sequence_example( example.SerializeToString(), context_features=fc.make_parse_example_spec_v2(columns[:2]), sequence_features=fc.make_parse_example_spec_v2(columns[2:])) with self.cached_session() as sess: ctx_result, seq_result = sess.run([context, seq_features]) self.assertEqual(list(seq_result[col_name].dense_shape), shape) self.assertEqual( list(seq_result[col_name].values[[0, 2, 6]]), values) self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1]) self.assertEqual(ctx_result['int_ctx'].values[0], 5) self.assertEqual(list(ctx_result['float_ctx'].shape), [1]) self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
def _test_parsed_sequence_example( self, col_name, col_fn, col_arg, shape, values): """Helper function to check that each FeatureColumn parses correctly. Args: col_name: string, name to give to the feature column. Should match the name that the column will parse out of the features dict. col_fn: function used to create the feature column. For example, sequence_numeric_column. col_arg: second arg that the target feature column is expecting. shape: the expected dense_shape of the feature after parsing into a SparseTensor. values: the expected values at index [0, 2, 6] of the feature after parsing into a SparseTensor. """ example = _make_sequence_example() columns = [ fc.categorical_column_with_identity('int_ctx', num_buckets=100), fc.numeric_column('float_ctx'), col_fn(col_name, col_arg) ] context, seq_features = parsing_ops.parse_single_sequence_example( example.SerializeToString(), context_features=fc.make_parse_example_spec(columns[:2]), sequence_features=fc.make_parse_example_spec(columns[2:])) with self.cached_session() as sess: ctx_result, seq_result = sess.run([context, seq_features]) self.assertEqual(list(seq_result[col_name].dense_shape), shape) self.assertEqual( list(seq_result[col_name].values[[0, 2, 6]]), values) self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1]) self.assertEqual(ctx_result['int_ctx'].values[0], 5) self.assertEqual(list(ctx_result['float_ctx'].shape), [1]) self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
def _test(self, kwargs, expected_context_values=None, expected_feat_list_values=None, expected_err=None): expected_context_values = expected_context_values or {} expected_feat_list_values = expected_feat_list_values or {} with self.test_session() as sess: if expected_err: with self.assertRaisesWithPredicateMatch(expected_err[0], expected_err[1]): c_out, fl_out = parsing_ops.parse_single_sequence_example(**kwargs) if c_out: sess.run(flatten_values_tensors_or_sparse(c_out.values())) if fl_out: sess.run(flatten_values_tensors_or_sparse(fl_out.values())) else: # Returns dicts w/ Tensors and SparseTensors. context_out, feat_list_out = parsing_ops.parse_single_sequence_example( **kwargs) context_result = sess.run( flatten_values_tensors_or_sparse(context_out.values( ))) if context_out else [] feat_list_result = sess.run( flatten_values_tensors_or_sparse(feat_list_out.values( ))) if feat_list_out else [] # Check values. _compare_output_to_expected(self, context_out, expected_context_values, context_result) _compare_output_to_expected(self, feat_list_out, expected_feat_list_values, feat_list_result) # Check shapes; if serialized is a Tensor we need its size to # properly check. if "context_features" in kwargs: for k, f in kwargs["context_features"].items(): if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None: self.assertEqual( tuple(context_out[k].get_shape().as_list()), f.shape) elif isinstance(f, parsing_ops.VarLenFeature): self.assertEqual( tuple(context_out[k].indices.get_shape().as_list()), (None, 1)) self.assertEqual( tuple(context_out[k].values.get_shape().as_list()), (None,)) self.assertEqual( tuple(context_out[k].dense_shape.get_shape().as_list()), (1,))
def parse_feature_columns_from_sequence_examples( serialized, context_feature_columns, sequence_feature_columns, name=None, example_name=None): """Parses tf.SequenceExamples to extract tensors for given `FeatureColumn`s. Args: serialized: A scalar (0-D Tensor) of type string, a single serialized `SequenceExample` proto. context_feature_columns: An iterable containing the feature columns for context features. All items should be instances of classes derived from `_FeatureColumn`. Can be `None`. sequence_feature_columns: An iterable containing the feature columns for sequence features. All items should be instances of classes derived from `_FeatureColumn`. Can be `None`. name: A name for this operation (optional). example_name: A scalar (0-D Tensor) of type string (optional), the names of the serialized proto. Returns: A tuple consisting of (context_features, sequence_features) * context_features: a dict mapping `FeatureColumns` from `context_feature_columns` to their parsed `Tensors`/`SparseTensor`s. * sequence_features: a dict mapping `FeatureColumns` from `sequence_feature_columns` to their parsed `Tensors`/`SparseTensor`s. """ # Sequence example parsing requires a single (scalar) example. try: serialized = array_ops.reshape(serialized, []) except ValueError as e: raise ValueError( 'serialized must contain as single sequence example. Batching must be ' 'done after parsing for sequence examples. Error: {}'.format(e)) if context_feature_columns is None: context_feature_columns = [] if sequence_feature_columns is None: sequence_feature_columns = [] check_feature_columns(context_feature_columns) context_feature_spec = fc.create_feature_spec_for_parsing( context_feature_columns) check_feature_columns(sequence_feature_columns) sequence_feature_spec = fc._create_sequence_feature_spec_for_parsing( # pylint: disable=protected-access sequence_feature_columns, allow_missing_by_default=False) return parsing_ops.parse_single_sequence_example(serialized, context_feature_spec, sequence_feature_spec, example_name, name)
def serialize_map(replay_example_str): """Parse each example string to `tf.Tensor`.""" try: assert_op = control_flow_ops.Assert(replay_example_str != "", [replay_example_str]) with ops.control_dependencies([assert_op]): _, replay = parsing_ops.parse_single_sequence_example( replay_example_str, sequence_features=replay_features) except errors_impl.InvalidArgumentError: raise errors_impl.OutOfRangeError() return convert_and_fix_dtypes(replay)
def main(): reader = tf.TFRecordReader data_sources = ["traineh.tfrecord"] _, data = parallel_reader.parallel_read( data_sources, reader_class=reader, num_epochs=1, num_readers=1, shuffle=False, capacity=256, min_after_dequeue=1) context_features, sequence_features = parsing_ops.parse_single_sequence_example(data, context_features={ 'video_id': tf.VarLenFeature(tf.string), 'labels': tf.VarLenFeature(tf.int64), }, sequence_features={ 'inc3': tf.FixedLenSequenceFeature(1, tf.string) }, example_name="") with tf.Session() as sess: sess.run(tf.initialize_local_variables()) sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): meta = sess.run(context_features) vid = meta['video_id'].values[0] labels = meta['labels'].values inc3_fea = sess.run(sequence_features)['inc3'] frame_feas = [] for r in inc3_fea: v = np.fromstring(r[0], dtype=np.uint8) frame_feas.append(v[None, :]) frame_feas = np.vstack(frame_feas) print(vid, labels) print(frame_feas.shape) # Do something here except tf.errors.OutOfRangeError: print('Finished extracting.') finally: coord.request_stop() coord.join(threads)
def _convert_Youtube8M_tfrecord_to_numpy(tfrecord_filename): ''' Function: _convert_Youtube8M_tfrecord_to_numpy i.e. parse each data_component according to example_prototxt Input: <string> tfrecord_filename Output: <dictionary> parsed_data ''' reader = tf.TFRecordReader _, data = parallel_reader.parallel_read(data_sources=tfrecord_filename, reader_class=reader, num_epochs=1, num_readers=1, shuffle=False, capacity=256, min_after_dequeue=1) # build-up fileQueue and exampleQueue for tfrecords.file... context_feat, seq_feat = parsing_ops.parse_single_sequence_example( data, context_features={ 'video_id': tf.VarLenFeature(tf.string), 'labels': tf.VarLenFeature(tf.int64) }, sequence_features={ 'rgb': tf.FixedLenSequenceFeature([], tf.string), 'audio': tf.FixedLenSequenceFeature([], tf.string) }, example_name=" ") # standard framework for example parsing... with tf.Session() as sess: #--- initialize variables in tensorflow session ---# sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) #--- start-up coordinator to manage the QueueRunner threads ---# coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) #--- training operations ---# try: total_rgb_feat = [] total_audio_feat = [] total_label = [] while not coord.should_stop(): video_context, video_features = sess.run( (context_feat, seq_feat)) #--- extract 'video_id' and 'labels' from context features ---# video_id = video_context['video_id'].values[0] labels = video_context['labels'].values #--- one-hot vector for labels ---# labels = sess.run( tf.sparse_to_dense(labels, (4716, ), 1, validate_indices=False)) #--- extract 'rgb' and 'audio' features from video features ---# hex_rgb_feat = video_features['rgb'] hex_audio_feat = video_features['audio'] rgb_feat = [] audio_feat = [] #--- convert hex data i.e. hex_rgb_feat to numpy.uint8 format ---# for ii in range(len(hex_rgb_feat)): single_rgb_feat = np.fromstring(hex_rgb_feat[ii], dtype=np.uint8) single_audio_feat = np.fromstring(hex_audio_feat[ii], dtype=np.uint8) rgb_feat.append(single_rgb_feat) audio_feat.append(single_audio_feat) #--- reshape e.g. [[1,2], [3,4]] -> [1,2; 3,4] rgb_feat = np.vstack(rgb_feat) audio_feat = np.vstack(audio_feat) #--- dequantize the rgb and audio features... ---# rgb_feat = _dequantize(rgb_feat, 2, -2) audio_feat = _dequantize(audio_feat, 2, -2) #--- padding or crop to fixed nframe=300... ---# rgb_feat = _frame_padding(input_feat=rgb_feat, padding_value=0, target_nframe=300) audio_feat = _frame_padding(input_feat=audio_feat, padding_value=0, target_nframe=300) total_rgb_feat.append(rgb_feat) total_audio_feat.append(audio_feat) total_label.append(labels) except tf.errors.OutOfRangeError: print('!All video features have been exported...') finally: coord.request_stop() coord.join(threads=threads) return total_rgb_feat, total_audio_feat, total_label sess.close()