def testDenseDefaultNoShapeShouldFail(self): original = [ example(features=features({ "a": float_feature([1, 1, 3]), })), ] serialized = [m.SerializeToString() for m in original] self._test(ops.convert_to_tensor(serialized), {"a": parsing_ops.FixedLenFeature(None, dtypes.float32)}, expected_err=(ValueError, "Missing shape for feature a"))
def testReadWithEquivalentDataset(self): features = { "file": parsing_ops.FixedLenFeature([], dtypes.int64), "record": parsing_ops.FixedLenFeature([], dtypes.int64), } dataset = (core_readers.TFRecordDataset(self.test_filenames).map( lambda x: parsing_ops.parse_single_example(x, features)).repeat( 10).batch(2)) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer next_element = iterator.get_next() with self.cached_session() as sess: sess.run(init_op) for file_batch, _, _, _, record_batch, _ in self._next_expected_batch( range(self._num_files), 2, 10): actual_batch = sess.run(next_element) self.assertAllEqual(file_batch, actual_batch["file"]) self.assertAllEqual(record_batch, actual_batch["record"]) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element)
def _assert_single_feature_column(self, expected_shape, expected_dtype, feature_columns): self.assertEqual(1, len(feature_columns)) feature_column = feature_columns[0] self.assertEqual('', feature_column.name) self.assertEqual( { '': parsing_ops.FixedLenFeature( shape=expected_shape, dtype=expected_dtype) }, feature_column.config)
def my_input_fn(): feature_to_type = { "example_0.age": parsing_ops.FixedLenFeature([1], dtypes.int64), "example_1.age": parsing_ops.FixedLenFeature([1], dtypes.int64), "example_0.weight": parsing_ops.FixedLenFeature([1], dtypes.int64), "example_1.weight": parsing_ops.FixedLenFeature([1], dtypes.int64), "example_0.label": parsing_ops.FixedLenFeature([1], dtypes.float32), "example_1.label": parsing_ops.FixedLenFeature([1], dtypes.float32) } feature_1_proto = example_pb2.Example() feature_2_proto = example_pb2.Example() text_format.Merge(EXAMPLE_1_PROTO, feature_1_proto) text_format.Merge(EXAMPLE_2_PROTO, feature_2_proto) features_tensor = parsing_ops.parse_example([ feature_1_proto.SerializeToString(), feature_2_proto.SerializeToString() ], feature_to_type) # Create the dataset. dataset = dataset_ops.Dataset.from_tensor_slices( features_tensor).batch(2) return dataset.make_one_shot_iterator().get_next()
def testSerializedContainingDenseWithDefaults(self): original = [ example(features=features({ "a": float_feature([1, 1]), })), example(features=features({ "b": bytes_feature([b"b1"]), })), example(features=features({ "b": feature() })), ] expected_outputs = [{ "a": np.array([1, 1], dtype=np.float32).reshape(1, 2, 1), "b": np.array("tmp_str", dtype=bytes).reshape(1, 1, 1, 1) }, { "a": np.array([3, -3], dtype=np.float32).reshape(1, 2, 1), "b": np.array("b1", dtype=bytes).reshape(1, 1, 1, 1) }, { "a": np.array([3, -3], dtype=np.float32).reshape(1, 2, 1), "b": np.array("tmp_str", dtype=bytes).reshape(1, 1, 1, 1) }] for proto, expected_output in zip(original, expected_outputs): self._test({ "serialized": ops.convert_to_tensor(proto.SerializeToString()), "features": { "a": parsing_ops.FixedLenFeature( (1, 2, 1), dtype=dtypes.float32, default_value=[3.0, -3.0]), "b": parsing_ops.FixedLenFeature( (1, 1, 1, 1), dtype=dtypes.string, default_value="tmp_str"), } }, expected_output)
def testEmptySerializedWithoutDefaultsShouldFail(self): input_features = { "st_a": parsing_ops.VarLenFeature(dtypes.int64), "a": parsing_ops.FixedLenFeature( (1, 3), dtypes.int64, default_value=[0, 42, 0]), "b": parsing_ops.FixedLenFeature( (3, 3), dtypes.string, default_value=np.random.rand(3, 3).astype(bytes)), # Feature "c" is missing a default, this gap will cause failure. "c": parsing_ops.FixedLenFeature( (2,), dtype=dtypes.float32), } # Edge case where the key is there but the feature value is empty original = example(features=features({"c": feature()})) self._test( { "example_names": ["in1"], "serialized": [original.SerializeToString()], "features": input_features, }, expected_err=( errors_impl.OpError, "Name: in1, Feature: c \\(data type: float\\) is required")) # Standard case of missing key and value. self._test( { "example_names": ["in1", "in2"], "serialized": ["", ""], "features": input_features, }, expected_err=( errors_impl.OpError, "Name: in1, Feature: c \\(data type: float\\) is required"))
def testVaryingFieldsInGenerator(self): def simple_generator(): for i in range(2): yield {"value": i, "seqlen_value": np.ones((i, 1))} simple_features = { "value": parsing_ops.FixedLenFeature(shape=[], dtype=dtypes.int32), "seqlen_value": parsing_ops.FixedLenSequenceFeature(shape=[1], dtype=dtypes.float32, allow_missing=True), "empty_value": parsing_ops.FixedLenFeature(default_value=[-1, -2], dtype=dtypes.int32, shape=[2]) } tensors = python_input.python_input(simple_generator, simple_features) self.assertEqual(set(["value", "seqlen_value", "empty_value"]), set(tensors.keys())) self.assertEqual(dtypes.int32, tensors["value"].dtype) self.assertEqual((), tensors["value"].shape) self.assertEqual(dtypes.float32, tensors["seqlen_value"].dtype) self.assertEqual([None, 1], tensors["seqlen_value"].shape.as_list()) self.assertEqual(dtypes.int32, tensors["empty_value"].dtype) self.assertEqual([2], tensors["empty_value"].shape) with self.test_session() as sess: r1 = sess.run(tensors) self.assertAllEqual(0, r1["value"]) self.assertAllEqual(np.ones((0, 1)), r1["seqlen_value"]) self.assertAllEqual([-1, -2], r1["empty_value"]) r2 = sess.run(tensors) self.assertAllEqual(1, r2["value"]) self.assertAllEqual([[1]], r2["seqlen_value"]) self.assertAllEqual([-1, -2], r2["empty_value"]) with self.assertRaisesOpError("Iteration finished"): sess.run(tensors)
def testDecodeExampleWithRepeatedImages(self): image_shape = (2, 3, 3) image_format = 'png' image, _ = self.GenerateImage( image_format=image_format, image_shape=image_shape) tf_encoded = self._Encoder(image, image_format) with self.cached_session(): tf_string = tf_encoded.eval() example = example_pb2.Example( features=feature_pb2.Features( feature={ 'image/encoded': feature_pb2.Feature( bytes_list=feature_pb2.BytesList( value=[tf_string, tf_string])), 'image/format': self._StringFeature(image_format), })) serialized_example = example.SerializeToString() with self.cached_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) decoder = tfexample_decoder.TFExampleDecoder( keys_to_features={ 'image/encoded': parsing_ops.FixedLenFeature((2,), dtypes.string), 'image/format': parsing_ops.FixedLenFeature( (), dtypes.string, default_value=image_format), }, items_to_handlers={'image': tfexample_decoder.Image(repeated=True)}) [tf_image] = decoder.decode(serialized_example, ['image']) output_image = tf_image.eval() self.assertEqual(output_image.shape, (2, 2, 3, 3)) self.assertAllEqual(np.squeeze(output_image[0, :, :, :]), image) self.assertAllEqual(np.squeeze(output_image[1, :, :, :]), image)
def testExampleLongerThanSpec(self): serialized = example( features=features({ "a": bytes_feature([b"a", b"b"]), })).SerializeToString() self._test( { "serialized": ops.convert_to_tensor(serialized), "features": { "a": parsing_ops.FixedLenFeature(1, dtypes.string) } }, expected_err=(errors_impl.OpError, "Can't parse serialized Example"))
def _train_input_fn(): ds = tfr.data.read_batched_sequence_example_dataset( tf_records, batch_size, list_size, context_feature_spec={ "query": parsing_ops.FixedLenFeature([FLAGS.query_size], tf.int64) }, example_feature_spec={ "candidates": parsing_ops.FixedLenFeature([1], tf.int64, default_value=tf.constant([-1], tf.int64)), "relevance": parsing_ops.FixedLenFeature([1], tf.int64, default_value=tf.constant([0], tf.int64)) }, reader_args=['GZIP'] ) ds = ds.map(lambda f: (f, tf.cast(tf.squeeze(f.pop('relevance'), -1), tf.float32))) iterator = ds.make_initializable_iterator() iterator_initializer_hook.iterator_initializer_fn = \ lambda sess: sess.run(iterator.initializer) return iterator.get_next()
def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self): real_valued_col1 = fc.real_valued_column("real_valued_column1", default_value=2) real_valued_col2 = fc.real_valued_column("real_valued_column2", 5, default_value=4) real_valued_col3 = fc.real_valued_column("real_valued_column3", default_value=[8]) real_valued_col4 = fc.real_valued_column("real_valued_column4", 3, default_value=[1, 0, 6]) real_valued_col5 = fc.real_valued_column("real_valued_column5", dimension=None, default_value=2) feature_columns = [ real_valued_col1, real_valued_col2, real_valued_col3, real_valued_col4, real_valued_col5 ] config = fc.create_feature_spec_for_parsing(feature_columns) self.assertEqual(5, len(config)) self.assertDictEqual( { "real_valued_column1": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[2.]), "real_valued_column2": parsing_ops.FixedLenFeature([5], dtype=dtypes.float32, default_value=[4., 4., 4., 4., 4. ]), "real_valued_column3": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32, default_value=[8.]), "real_valued_column4": parsing_ops.FixedLenFeature( [3], dtype=dtypes.float32, default_value=[1., 0., 6.]), "real_valued_column5": parsing_ops.VarLenFeature(dtype=dtypes.float32) }, config)
def testBasic(self): with session.Session() as sess: examples = array_ops.placeholder(dtypes.string, shape=[1]) feature_to_type = { 'x': parsing_ops.FixedLenFeature([1], dtypes.float32, 33.0), 'y': parsing_ops.VarLenFeature(dtypes.string) } result = parsing_ops.parse_example(examples, feature_to_type) parse_example_op = result['x'].op config = extract_example_parser_configuration( parse_example_op, sess) expected = self.getExpectedConfig(parse_example_op.type) self.assertProtoEquals(expected, config)
def testEmptySerializedWithAllDefaults(self): sparse_name = "st_a" a_name = "a" b_name = "b" c_name = "c:has_a_tricky_name" a_default = [0, 42, 0] b_default = np.random.rand(3, 3).astype(bytes) c_default = np.random.rand(2).astype(np.float32) expected_st_a = ( # indices, values, shape np.empty( (0, 2), dtype=np.int64), # indices np.empty( (0,), dtype=np.int64), # sp_a is DT_INT64 np.array( [2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 expected_output = { sparse_name: expected_st_a, a_name: np.array(2 * [[a_default]]), b_name: np.array(2 * [b_default]), c_name: np.array(2 * [c_default]), } self._test( ops.convert_to_tensor(["", ""]), { sparse_name: parsing_ops.VarLenFeature(dtypes.int64), a_name: parsing_ops.FixedLenFeature( (1, 3), dtypes.int64, default_value=a_default), b_name: parsing_ops.FixedLenFeature( (3, 3), dtypes.string, default_value=b_default), c_name: parsing_ops.FixedLenFeature( (2,), dtypes.float32, default_value=c_default), }, expected_values=expected_output)
def testSerializedContainingDense(self): aname = "a" bname = "b*has+a:tricky_name" original = [ example(features=features({ aname: float_feature([1, 1]), bname: bytes_feature([b"b0_str"]), })), example(features=features({ aname: float_feature([-1, -1]), bname: bytes_feature([b"b1"]), })) ] serialized = [m.SerializeToString() for m in original] expected_output = { aname: np.array( [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1), bname: np.array( ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1), } # No defaults, values required self._test( { "serialized": ops.convert_to_tensor(serialized), "features": { aname: parsing_ops.FixedLenFeature( (1, 2, 1), dtype=dtypes.float32), bname: parsing_ops.FixedLenFeature( (1, 1, 1, 1), dtype=dtypes.string), } }, expected_output)
def test_keyed_parse_json(self): gfile.Glob = self._orig_glob filename = self._create_temp_file( '{"features": {"feature": {"age": {"int64_list": {"value": [0]}}}}}\n' '{"features": {"feature": {"age": {"int64_list": {"value": [1]}}}}}\n' '{"features": {"feature": {"age": {"int64_list": {"value": [2]}}}}}\n' ) batch_size = 1 queue_capacity = 5 name = "my_batch" with ops.Graph().as_default() as g, self.test_session( graph=g) as session: dtypes = { "age": parsing_ops.FixedLenFeature([1], dtypes_lib.int64) } parse_fn = lambda example: parsing_ops.parse_single_example( # pylint: disable=g-long-lambda parsing_ops.decode_json_example(example), dtypes) keys, inputs = graph_io.read_keyed_batch_examples( filename, batch_size, reader=io_ops.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, parse_fn=parse_fn, name=name) self.assertAllEqual((None, ), keys.get_shape().as_list()) self.assertEqual(1, len(inputs)) self.assertAllEqual((None, 1), inputs["age"].get_shape().as_list()) session.run(variables.local_variables_initializer()) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(session, coord=coord) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[0]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":1"]) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[1]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":2"]) key, age = session.run([keys, inputs["age"]]) self.assertAllEqual(age, [[2]]) self.assertAllEqual(key, [filename.encode("utf-8") + b":3"]) with self.assertRaises(errors.OutOfRangeError): session.run(inputs) coord.request_stop() coord.join(threads)
def regressor_parse_example_spec(feature_columns, # pylint: disable=missing-docstring label_key, label_dtype=dtypes.float32, label_default=None, label_dimension=1, weight_column=None): parsing_spec = fc.make_parse_example_spec(feature_columns) label_spec = parsing_ops.FixedLenFeature( (label_dimension,), label_dtype, label_default) return _add_label_and_weight_to_parsing_spec( parsing_spec=parsing_spec, label_key=label_key, label_spec=label_spec, weight_column=weight_column)
def _labeled_to_unlabeled_features(features): """Convert a dict of lt.FixedLenFeature into a dict of tf.FixedLenFeature.""" unlabeled_features = {} for name, labeled_feature in features.items(): shape = [ax.size for ax in labeled_feature.axes] if any(size is None for size in shape): # This should be caught on the TensorFlow side, but it isn't yet: # https://github.com/tensorflow/tensorflow/issues/2874 raise ValueError('axes with unknown size are not supported') dtype = labeled_feature.dtype default_value = labeled_feature.default_value unlabeled_features[name] = parsing_ops.FixedLenFeature( shape, dtype, default_value) return unlabeled_features
def testSerializedContainingDenseWithDefaults(self): original = [ example(features=features({ "a": float_feature([1, 1]), })), example(features=features({ "b": bytes_feature([b"b1"]), })), example(features=features({ "b": feature() })), ] serialized = [m.SerializeToString() for m in original] expected_output = { "a": np.array( [[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(3, 1, 2, 1), "b": np.array( ["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(3, 1, 1, 1, 1), } self._test( ops.convert_to_tensor(serialized), { "a": parsing_ops.FixedLenFeature( (1, 2, 1), dtype=dtypes.float32, default_value=[3.0, -3.0]), "b": parsing_ops.FixedLenFeature( (1, 1, 1, 1), dtype=dtypes.string, default_value="tmp_str"), }, expected_values=expected_output, create_iterator_twice=True)
def testFromExamples(self): num_batches = 77 enqueue_size = 11 batch_size = 13 data_path = _make_test_tfrecord() features = { "fixed_len_float": parsing_ops.FixedLenFeature(shape=[2], dtype=dtypes.float32, default_value=[0.0, 0.0]), "var_len_int": parsing_ops.VarLenFeature(dtype=dtypes.int64) } tensorflow_df = df.TensorFlowDataFrame.from_examples( data_path, enqueue_size=enqueue_size, batch_size=batch_size, features=features, shuffle=False) # `test.tfrecord` contains 100 records with two features: var_len_int and # fixed_len_float. Entry n contains `range(n % 3)` and # `float(n)` for var_len_int and fixed_len_float, # respectively. num_records = 100 def _expected_fixed_len_float(n): return np.array([float(n), 2 * float(n)]) def _expected_var_len_int(n): return np.arange(n % 3) for batch_num, batch in enumerate(tensorflow_df.run(num_batches)): record_numbers = [ n % num_records for n in range(batch_num * batch_size, (batch_num + 1) * batch_size) ] for i, j in enumerate(record_numbers): np.testing.assert_allclose(_expected_fixed_len_float(j), batch["fixed_len_float"][i]) var_len_int = batch["var_len_int"] for i, ind in enumerate(var_len_int.indices): val = var_len_int.values[i] expected_row = _expected_var_len_int(record_numbers[ind[0]]) expected_value = expected_row[ind[1]] np.testing.assert_array_equal(expected_value, val)
def DecodeExample(self, serialized_example, item_handler, image_format): """Decodes the given serialized example with the specified item handler. Args: serialized_example: a serialized TF example string. item_handler: the item handler used to decode the image. image_format: the image format being decoded. Returns: the decoded image found in the serialized Example. """ serialized_example = array_ops.reshape(serialized_example, shape=[]) decoder = tfexample_decoder.TFExampleDecoder( keys_to_features={ 'image/encoded': parsing_ops.FixedLenFeature((), tf.string, default_value=''), 'image/format': parsing_ops.FixedLenFeature((), tf.string, default_value=image_format), }, items_to_handlers={'image': item_handler}) [tf_image] = decoder.decode(serialized_example, ['image']) return tf_image
def setUp(self): super().setUp() feature_spec = {'x': parsing_ops.FixedLenFeature([1], dtypes.float32)} self._serving_input_receiver_fn = ( export_lib.build_parsing_serving_input_receiver_fn(feature_spec)) feature_spec = { 'x': array_ops.placeholder(dtype=dtypes.float32, shape=(2, 1), name='x'), } label_spec = array_ops.placeholder( dtype=dtypes.float32, shape=(1, 1), name='truth') self._supervised_input_receiver_fn = ( export_lib.build_raw_supervised_input_receiver_fn( feature_spec, label_spec))
def testFromCSVWithFeatureSpec(self): if not HAS_PANDAS: return num_batches = 100 batch_size = 8 data_path = _make_test_csv_sparse() feature_spec = { "int": parsing_ops.FixedLenFeature(None, dtypes.int16, np.nan), "float": parsing_ops.VarLenFeature(dtypes.float16), "bool": parsing_ops.VarLenFeature(dtypes.bool), "string": parsing_ops.FixedLenFeature(None, dtypes.string, "") } pandas_df = pd.read_csv(data_path, dtype={"string": object}) # Pandas insanely uses NaN for empty cells in a string column. # And, we can't use Pandas replace() to fix them because nan != nan s = pandas_df["string"] for i in range(0, len(s)): if isinstance(s[i], float) and math.isnan(s[i]): pandas_df.set_value(i, "string", "") tensorflow_df = df.TensorFlowDataFrame.from_csv_with_feature_spec( [data_path], batch_size=batch_size, shuffle=False, feature_spec=feature_spec) # These columns were sparse; re-densify them for comparison tensorflow_df["float"] = densify.Densify(np.nan)( tensorflow_df["float"]) tensorflow_df["bool"] = densify.Densify(np.nan)(tensorflow_df["bool"]) self._assert_pandas_equals_tensorflow(pandas_df, tensorflow_df, num_batches=num_batches, batch_size=batch_size)
def test_parse_single_example(self): def _int64_feature(*values): return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=values)) def _bytes_feature(*values): return feature_pb2.Feature( bytes_list=feature_pb2.BytesList( value=[v.encode("utf-8") for v in values])) examples = constant_op.constant([ example_pb2.Example( features=feature_pb2.Features( feature={ "dense_int": _int64_feature(i), "dense_str": _bytes_feature(str(i)), "sparse_int": _int64_feature(i, i * 2, i * 4, i * 8), "sparse_str": _bytes_feature(*["abc"] * i) })).SerializeToString() for i in range(10) ]) features = { "dense_int": parsing_ops.FixedLenFeature((), dtypes.int64, 0), "dense_str": parsing_ops.FixedLenFeature((), dtypes.string, ""), "sparse_int": parsing_ops.VarLenFeature(dtypes.int64), "sparse_str": parsing_ops.VarLenFeature(dtypes.string), } def loop_fn(i): example_proto = array_ops.gather(examples, i) f = parsing_ops.parse_single_example(example_proto, features) return f pfor = pfor_control_flow_ops.pfor(loop_fn, iters=10) manual = parsing_ops.parse_example(examples, features) self.run_and_assert_equal(pfor, manual)
def testSerializedContainingDense(self): aname = "a" bname = "b*has+a:tricky_name" original = [ example(features=features({ aname: float_feature([1, 1]), bname: bytes_feature([b"b0_str"]), })), example(features=features({ aname: float_feature([-1, -1]), bname: bytes_feature([b""]), })) ] serialized = [m.SerializeToString() for m in original] expected_output = { aname: np.array( # pylint: disable=too-many-function-args [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1), bname: np.array( # pylint: disable=too-many-function-args ["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1), } # No defaults, values required self._test(ops.convert_to_tensor(serialized), { aname: parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32), bname: parsing_ops.FixedLenFeature((1, 1, 1, 1), dtype=dtypes.string), }, expected_values=expected_output, create_iterator_twice=True)
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" times_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64) values_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, shape=(self._model.num_features, )) parsed_features_no_sequence = ( feature_column.make_parse_example_spec( list(self._model.exogenous_feature_columns) + [times_column, values_column])) parsed_features = {} for key, feature_spec in parsed_features_no_sequence.items(): if isinstance(feature_spec, parsing_ops.FixedLenFeature): if key == feature_keys.TrainEvalFeatures.VALUES: parsed_features[key] = feature_spec._replace( shape=((values_proto_length, ) + feature_spec.shape)) else: parsed_features[key] = feature_spec._replace( shape=((filtering_length + prediction_length, ) + feature_spec.shape)) elif feature_spec.dtype == dtypes.string: parsed_features[key] = parsing_ops.FixedLenFeature( shape=(filtering_length + prediction_length, ), dtype=dtypes.string) else: # VarLenFeature raise ValueError( "VarLenFeatures not supported, got %s for key %s" % (feature_spec, key)) tfexamples = array_ops.placeholder(shape=[default_batch_size], dtype=dtypes.string, name="input") features = parsing_ops.parse_example(serialized=tfexamples, features=parsed_features) features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze( features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast( features[feature_keys.TrainEvalFeatures.VALUES], dtype=self._model.dtype)[:, :filtering_length] features.update( self._model_start_state_placeholders( batch_size_tensor=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0], static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(features, {"examples": tfexamples})
def testDenseNotMatchingShapeShouldFail(self): original = example(features=features({ "a": float_feature([-1, -1]), })) serialized = original.SerializeToString() self._test( { "serialized": ops.convert_to_tensor(serialized), "features": { "a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32) } }, # TODO(mrry): Consider matching the `tf.parse_example()` error message. expected_err=(errors_impl.OpError, "Key: a."))
def testDenseNotMatchingShapeShouldFail(self): original = [ example(features=features({ "a": float_feature([1, 1, 3]), })), example(features=features({ "a": float_feature([-1, -1]), })) ] serialized = [m.SerializeToString() for m in original] self._test( ops.convert_to_tensor(serialized), {"a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)}, expected_err=(errors_impl.InvalidArgumentError, "Key: a, Index: 1. Number of float values"))
def parse_examples(example_protos): features = { 'target': parsing_ops.FixedLenFeature( shape=[1], dtype=dtypes.float32, default_value=0), 'age_indices': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'age_values': parsing_ops.VarLenFeature(dtype=dtypes.float32), 'gender_indices': parsing_ops.VarLenFeature(dtype=dtypes.int64), 'gender_values': parsing_ops.VarLenFeature(dtype=dtypes.float32) } return parsing_ops.parse_example( [e.SerializeToString() for e in example_protos], features)
def testBasic(self): golden_config = example_parser_configuration_pb2.ExampleParserConfiguration( ) text_format.Parse(BASIC_PROTO, golden_config) with session.Session() as sess: examples = array_ops.placeholder(dtypes.string, shape=[1]) feature_to_type = { 'x': parsing_ops.FixedLenFeature([1], dtypes.float32, 33.0), 'y': parsing_ops.VarLenFeature(dtypes.string) } _ = parsing_ops.parse_example(examples, feature_to_type) parse_example_op = sess.graph.get_operation_by_name( 'ParseExample/ParseExample') config = extract_example_parser_configuration( parse_example_op, sess) self.assertProtoEquals(golden_config, config)
def testTrainEvaluateWithDnnForInputAndTreeForPredict(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 3 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreDNNBoostedTreeCombinedEstimator( head=head_fn, dnn_hidden_units=[1], dnn_feature_columns=[core_feature_column.numeric_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=True, predict_with_tree_only=True, dnn_to_tree_distillation_param=(0.5, None), tree_feature_columns=[]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) res = est.evaluate(input_fn=_eval_input_fn, steps=1) self.assertLess(0.5, res["auc"]) est.predict(input_fn=_eval_input_fn) serving_input_fn = ( export.build_parsing_serving_input_receiver_fn( feature_spec={"x": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32)})) base_exporter = exporter.FinalExporter( name="Servo", serving_input_receiver_fn=serving_input_fn, assets_extra=None) export_path = os.path.join(model_dir, "export") base_exporter.export( est, export_path=export_path, checkpoint_path=None, eval_result={}, is_the_final_export=True)