def testBinaryClassifierTrainInMemoryWithMixedColumns(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='f_0', vocabulary_list=('bad', 'good', 'ok')) indicator_col = feature_column.indicator_column(categorical) bucketized_col = feature_column.bucketized_column( feature_column.numeric_column('f_1', dtype=dtypes.float32), BUCKET_BOUNDARIES) numeric_col = feature_column.numeric_column('f_2', dtype=dtypes.float32) labels = np.array([[0], [1], [1], [1], [1]], dtype=np.float32) input_fn = numpy_io.numpy_input_fn( x={ 'f_0': np.array(['bad', 'good', 'good', 'ok', 'bad']), 'f_1': np.array([1, 1, 1, 1, 1]), 'f_2': np.array([12.5, 1.0, -2.001, -2.0001, -1.999]), }, y=labels, num_epochs=None, batch_size=5, shuffle=False) feature_columns = [numeric_col, bucketized_col, indicator_col] est = boosted_trees.boosted_trees_classifier_train_in_memory( train_input_fn=input_fn, feature_columns=feature_columns, n_trees=1, max_depth=5, quantile_sketch_epsilon=0.33) self._assert_checkpoint( est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0)
def test_saving_with_dense_features(self): cols = [ feature_column_lib.numeric_column('a'), feature_column_lib.indicator_column( feature_column_lib.categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(1,), name='a'), 'b': keras.layers.Input(shape=(1,), name='b', dtype='string') } fc_layer = dense_features.DenseFeatures(cols)(input_layers) output = keras.layers.Dense(10)(fc_layer) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) inputs_a = np.arange(10).reshape(10, 1) inputs_b = np.arange(10).reshape(10, 1).astype('str') with self.cached_session(): # Initialize tables for V1 lookup. if not context.executing_eagerly(): self.evaluate(lookup_ops.tables_initializer()) self.assertLen(loaded_model.predict({'a': inputs_a, 'b': inputs_b}), 10)
def test_saving_with_sequence_features(self): cols = [ feature_column_lib.sequence_numeric_column('a'), feature_column_lib.indicator_column( feature_column_lib. sequence_categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(None, 1), sparse=True, name='a'), 'b': keras.layers.Input(shape=(None, 1), sparse=True, name='b', dtype='string') } fc_layer, _ = feature_column_lib.SequenceFeatures(cols)(input_layers) # TODO(tibell): Figure out the right dtype and apply masking. # sequence_length_mask = array_ops.sequence_mask(sequence_length) # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile(loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = sparse_tensor.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = sparse_tensor.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) with self.cached_session(): # Initialize tables for V1 lookup. if not context.executing_eagerly(): self.evaluate(lookup_ops.tables_initializer()) self.assertLen( loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }, steps=1), batch_size)
def test_save_load_with_dense_features(self, tmpdir, api, loss, optimizer, metrics): if optimizer is None: pytest.skip() cols = [ feature_column_lib.numeric_column("a"), feature_column_lib.indicator_column( feature_column_lib.categorical_column_with_vocabulary_list( "b", ["one", "two"])), ] input_layers = { "a": keras.layers.Input(shape=(1, ), name="a"), "b": keras.layers.Input(shape=(1, ), name="b", dtype="string"), } fc_layer = dense_features.DenseFeatures(cols)(input_layers) output = keras.layers.Dense(10)(fc_layer) model = keras.models.Model(input_layers, output) model.compile( loss=loss, optimizer=optimizer, metrics=[metrics], ) tiledb_uri = os.path.join(tmpdir, "model_array") tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri, model=model) tiledb_model_obj.save(include_optimizer=True) loaded_model = tiledb_model_obj.load(compile_model=True) model_opt_weights = batch_get_value(getattr(model.optimizer, "weights")) loaded_opt_weights = batch_get_value( getattr(loaded_model.optimizer, "weights")) # Assert optimizer weights are equal for weight_model, weight_loaded_model in zip(model_opt_weights, loaded_opt_weights): np.testing.assert_array_equal(weight_model, weight_loaded_model) inputs_a = np.arange(10).reshape(10, 1) inputs_b = np.arange(10).reshape(10, 1).astype("str") # Assert model predictions are equal np.testing.assert_array_equal( loaded_model.predict({ "a": inputs_a, "b": inputs_b }), model.predict({ "a": inputs_a, "b": inputs_b }), )
def test_forward_in_exported_sparse(self): features_columns = [ fc.indicator_column( fc.categorical_column_with_vocabulary_list('x', range(10))) ] classifier = linear.LinearClassifier(feature_columns=features_columns) def train_input_fn(): dataset = dataset_ops.Dataset.from_tensors({ 'x': sparse_tensor.SparseTensor(values=[1, 2, 3], indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]), 'labels': [[0], [1]] }) def _split(x): labels = x.pop('labels') return x, labels dataset = dataset.map(_split) return dataset classifier.train(train_input_fn, max_steps=1) classifier = extenders.forward_features(classifier, keys=['x'], sparse_default_values={'x': 0}) def serving_input_fn(): features_ph = array_ops.placeholder(dtype=dtypes.int32, name='x', shape=[None]) features = {'x': layers.dense_to_sparse(features_ph)} return estimator_lib.export.ServingInputReceiver( features, {'x': features_ph}) export_dir, tmpdir = self._export_estimator(classifier, serving_input_fn) prediction_fn = from_saved_model(export_dir, signature_def_key='predict') features = (0, 2) prediction = prediction_fn({'x': features}) self.assertIn('x', prediction) self.assertEqual(features, tuple(prediction['x'])) gfile.DeleteRecursively(tmpdir)
def test_functional_input_layer_with_numpy_input_fn(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values # price has 1 dimension in input_layer price = fc.numeric_column('price') body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # one_hot_body_style has 3 dims in input_layer. one_hot_body_style = fc.indicator_column(body_style) # embedded_body_style has 5 dims in input_layer. embedded_body_style = fc.embedding_column(body_style, dimension=5, initializer=_initializer) input_fn = numpy_io.numpy_input_fn(x={ 'price': np.array([11., 12., 13., 14.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.input_layer(features, [price, one_hot_body_style, embedded_body_style]) self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual([[11., 12., 13., 14., 15., 0., 0., 1., 11.], [1., 2., 3., 4., 5., 1., 0., 0., 12]], sess.run(net)) coord.request_stop() coord.join(threads)
def test_string_input(self): x = {'age': np.random.random((1024, 1)), 'cabin': np.array(['a'] * 1024)} y = np.random.randint(2, size=(1024, 1)) ds1 = dataset_ops.Dataset.from_tensor_slices(x) ds2 = dataset_ops.Dataset.from_tensor_slices(y) dataset = dataset_ops.Dataset.zip((ds1, ds2)).batch(4) categorical_cols = [fc.categorical_column_with_hash_bucket('cabin', 10)] feature_cols = ([fc.numeric_column('age')] + [fc.indicator_column(cc) for cc in categorical_cols]) layers = [fc.DenseFeatures(feature_cols), keras.layers.Dense(128), keras.layers.Dense(1)] model = keras.models.Sequential(layers) model.compile(optimizer='sgd', loss=keras.losses.BinaryCrossentropy()) model.fit(dataset)
def test_functional_input_layer_with_numpy_input_fn(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 (6., 7., 8., 9., 10.), # id 1 (11., 12., 13., 14., 15.) # id 2 ) def _initializer(shape, dtype, partition_info): del shape, dtype, partition_info return embedding_values # price has 1 dimension in input_layer price = fc.numeric_column('price') body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # one_hot_body_style has 3 dims in input_layer. one_hot_body_style = fc.indicator_column(body_style) # embedded_body_style has 5 dims in input_layer. embedded_body_style = fc.embedding_column(body_style, dimension=5, initializer=_initializer) input_fn = numpy_io.numpy_input_fn( x={ 'price': np.array([11., 12., 13., 14.]), 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), }, batch_size=2, shuffle=False) features = input_fn() net = fc.input_layer(features, [price, one_hot_body_style, embedded_body_style]) self.assertEqual(1 + 3 + 5, net.shape[1]) with self._initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual( [[11., 12., 13., 14., 15., 0., 0., 1., 11.], [1., 2., 3., 4., 5., 1., 0., 0., 12]], sess.run(net)) coord.request_stop() coord.join(threads)
def test_sequential_model_with_crossed_column(self): feature_columns = [] age_buckets = fc.bucketized_column( fc.numeric_column('age'), boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) feature_columns.append(age_buckets) # indicator cols thal = fc.categorical_column_with_vocabulary_list( 'thal', ['fixed', 'normal', 'reversible']) crossed_feature = fc.crossed_column([age_buckets, thal], hash_bucket_size=1000) crossed_feature = fc.indicator_column(crossed_feature) feature_columns.append(crossed_feature) feature_layer = df.DenseFeatures(feature_columns) model = keras.models.Sequential([ feature_layer, keras.layers.Dense(128, activation='relu'), keras.layers.Dense(128, activation='relu'), keras.layers.Dense(1, activation='sigmoid') ]) age_data = np.random.randint(10, 100, size=100) thal_data = np.random.choice(['fixed', 'normal', 'reversible'], size=100) inp_x = {'age': age_data, 'thal': thal_data} inp_y = np.random.randint(0, 1, size=100) ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5) model.compile( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'], ) model.fit(ds, epochs=1) model.fit(ds, epochs=1) model.evaluate(ds) model.predict(ds)
def test_save_load_with_sequence_features(self): cols = [ feature_column_lib.sequence_numeric_column("a"), feature_column_lib.indicator_column( feature_column_lib. sequence_categorical_column_with_vocabulary_list( "b", ["one", "two"])), ] input_layers = { "a": keras.layers.Input(shape=(None, 1), sparse=True, name="a"), "b": keras.layers.Input(shape=(None, 1), sparse=True, name="b", dtype="string"), } fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer="rmsprop", metrics=[keras.metrics.categorical_accuracy], ) tiledb_uri = os.path.join(self.get_temp_dir(), "model_array") tiledb_model_obj = TensorflowTileDB(uri=tiledb_uri) tiledb_model_obj.save(model=model, include_optimizer=True) loaded_model = tiledb_model_obj.load(compile_model=True) model_opt_weights = batch_get_value(getattr(model.optimizer, "weights")) loaded_opt_weights = batch_get_value( getattr(loaded_model.optimizer, "weights")) # Assert optimizer weights are equal for weight_model, weight_loaded_model in zip(model_opt_weights, loaded_opt_weights): np.testing.assert_array_equal(weight_model, weight_loaded_model) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = sparse_tensor.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = sparse_tensor.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) # Assert model predictions are equal np.testing.assert_array_equal( loaded_model.predict({ "a": inputs_a, "b": inputs_b }, steps=1), model.predict({ "a": inputs_a, "b": inputs_b }, steps=1), )