def testDenseFeaturesDirectly(self): features = { "text_a": ["hello world", "pair-programming"], "text_b": ["hello world", "oov token"], } feature_columns = [ hub.text_embedding_column_v2("text_a", self.model, trainable=False), hub.text_embedding_column_v2("text_b", self.model, trainable=False), ] feature_layer = tf.keras.layers.DenseFeatures(feature_columns) feature_layer_out = feature_layer(features) self.assertAllEqual(feature_layer_out, [[1, 2, 3, 4, 1, 2, 3, 4], [5, 5, 5, 5, 0, 0, 0, 0]])
def testConfig(self): text_column = hub.text_embedding_column_v2( "text", self.model, trainable=True) config = text_column.get_config() cloned_column = hub.feature_column_v2._TextEmbeddingColumnV2.from_config( config) self.assertEqual(cloned_column.module_path, text_column.module_path)
def testWorksWithTF2DnnClassifier(self): self.skipTest("b/154115879 - needs more investigation for timeout.") comment_embedding_column = hub.text_embedding_column_v2( "comment", self.model, trainable=False) upvotes = tf.feature_column.numeric_column("upvotes") feature_columns = [comment_embedding_column, upvotes] estimator = tf.estimator.DNNClassifier( hidden_units=[10], feature_columns=feature_columns, model_dir=self.get_temp_dir()) # This only tests that estimator apis are working with the feature # column without throwing exceptions. def input_fn(): features = { "comment": np.array([ ["the quick brown fox"], ["spam spam spam"], ]), "upvotes": np.array([ [20], [1], ]), } labels = np.array([[1], [0]]) return features, labels estimator.train(input_fn, max_steps=1) estimator.evaluate(input_fn, steps=1) estimator.predict(input_fn)
def testMakeParseExampleSpec(self): text_column = hub.text_embedding_column_v2("text", self.model, trainable=False) parsing_spec = tf.feature_column.make_parse_example_spec([text_column]) self.assertEqual(parsing_spec, {"text": tf.io.FixedLenFeature([1], dtype=tf.string)})
def testLoadingDifferentFeatureColumnsFails(self): features = [ np.array(["hello world", "pair-programming"]), np.array(["hello world", "pair-programming"]), ] label = np.int64([0, 1]) feature_columns = [ hub.text_embedding_column_v2("text_1", self.model, trainable=True), ] # Build the first model. input_features = dict(text_1=tf.keras.layers.Input( name="text_1", shape=[None], dtype=tf.string)) dense_features = tf.keras.layers.DenseFeatures(feature_columns) x = dense_features(input_features) x = tf.keras.layers.Dense(16, activation="relu")(x) logits = tf.keras.layers.Dense(1, activation="linear")(x) model_1 = tf.keras.Model(inputs=input_features, outputs=logits) model_1.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) model_1.fit(x=features, y=label, epochs=10) checkpoint_path = os.path.join(self.get_temp_dir(), "checkpoints", "checkpoint-1") model_1.save_weights(checkpoint_path) # Build the second model with feature columns that have different names. feature_columns = [ hub.text_embedding_column_v2("text_2", self.model, trainable=True), ] input_features = dict(text_2=tf.keras.layers.Input( name="text_2", shape=[None], dtype=tf.string)) dense_features = tf.keras.layers.DenseFeatures(feature_columns) x = dense_features(input_features) x = tf.keras.layers.Dense(16, activation="relu")(x) logits = tf.keras.layers.Dense(1, activation="linear")(x) model_2 = tf.keras.Model(inputs=input_features, outputs=logits) model_2.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) # Loading of checkpoints from the first model into the second model should # fail. with self.assertRaisesRegexp(AssertionError, ".*Some Python objects were not bound.*"): model_2.load_weights(checkpoint_path).assert_consumed()
def testDenseFeaturesInKeras(self): features = { "text": np.array(["hello world", "pair-programming"]), } label = np.int64([0, 1]) feature_columns = [ hub.text_embedding_column_v2("text", self.model, trainable=True), ] input_features = dict( text=tf.keras.layers.Input(name="text", shape=[None], dtype=tf.string)) dense_features = tf.keras.layers.DenseFeatures(feature_columns) x = dense_features(input_features) x = tf.keras.layers.Dense(16, activation="relu")(x) logits = tf.keras.layers.Dense(1, activation="linear")(x) model = tf.keras.Model(inputs=input_features, outputs=logits) model.compile( optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"]) model.fit(x=features, y=label, epochs=10) self.assertAllEqual(model.predict(features["text"]).shape, [2, 1])
def testWorksWithDNNEstimatorAndDataset(self): description_embeddings = hub.text_embedding_column_v2( "descriptions", self.model_returning_dicts, output_key="outputs") def input_fn(): features = dict(descriptions=tf.constant([["sentence"]])) labels = tf.constant([[1]]) dataset = tf.data.Dataset.from_tensor_slices((features, labels)) data_batches = dataset.repeat().take(30).batch(5) return data_batches estimator = tf.estimator.DNNEstimator( model_dir=os.path.join(self.get_temp_dir(), "estimator_export"), hidden_units=[10], head=tf.estimator.BinaryClassHead(), feature_columns=[description_embeddings]) estimator.train(input_fn=input_fn, max_steps=1)
def testFeatureColumnsIsV2(self): feature_column = hub.text_embedding_column_v2("text_a", self.model) self.assertTrue(feature_column_v2.is_feature_column_v2([feature_column]))
def testParents(self): text_column = hub.text_embedding_column_v2( "text", self.model, trainable=False) self.assertEqual(["text"], text_column.parents)