예제 #1
0
  def testBinaryClassifierTrainInMemoryWithMixedColumns(self):
    categorical = feature_column.categorical_column_with_vocabulary_list(
        key='f_0', vocabulary_list=('bad', 'good', 'ok'))
    indicator_col = feature_column.indicator_column(categorical)
    bucketized_col = feature_column.bucketized_column(
        feature_column.numeric_column('f_1', dtype=dtypes.float32),
        BUCKET_BOUNDARIES)
    numeric_col = feature_column.numeric_column('f_2', dtype=dtypes.float32)

    labels = np.array([[0], [1], [1], [1], [1]], dtype=np.float32)
    input_fn = numpy_io.numpy_input_fn(
        x={
            'f_0': np.array(['bad', 'good', 'good', 'ok', 'bad']),
            'f_1': np.array([1, 1, 1, 1, 1]),
            'f_2': np.array([12.5, 1.0, -2.001, -2.0001, -1.999]),
        },
        y=labels,
        num_epochs=None,
        batch_size=5,
        shuffle=False)
    feature_columns = [numeric_col, bucketized_col, indicator_col]

    est = boosted_trees.boosted_trees_classifier_train_in_memory(
        train_input_fn=input_fn,
        feature_columns=feature_columns,
        n_trees=1,
        max_depth=5,
        quantile_sketch_epsilon=0.33)

    self._assert_checkpoint(
        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)

    eval_res = est.evaluate(input_fn=input_fn, steps=1)
    self.assertAllClose(eval_res['accuracy'], 1.0)
예제 #2
0
  def test_saving_with_dense_features(self):
    cols = [
        feature_column_lib.numeric_column('a'),
        feature_column_lib.indicator_column(
            feature_column_lib.categorical_column_with_vocabulary_list(
                'b', ['one', 'two']))
    ]
    input_layers = {
        'a': keras.layers.Input(shape=(1,), name='a'),
        'b': keras.layers.Input(shape=(1,), name='b', dtype='string')
    }

    fc_layer = dense_features.DenseFeatures(cols)(input_layers)
    output = keras.layers.Dense(10)(fc_layer)

    model = keras.models.Model(input_layers, output)

    model.compile(
        loss=keras.losses.MSE,
        optimizer='rmsprop',
        metrics=[keras.metrics.categorical_accuracy])

    config = model.to_json()
    loaded_model = model_config.model_from_json(config)

    inputs_a = np.arange(10).reshape(10, 1)
    inputs_b = np.arange(10).reshape(10, 1).astype('str')

    with self.cached_session():
      # Initialize tables for V1 lookup.
      if not context.executing_eagerly():
        self.evaluate(lookup_ops.tables_initializer())

      self.assertLen(loaded_model.predict({'a': inputs_a, 'b': inputs_b}), 10)
예제 #3
0
    def test_saving_with_sequence_features(self):
        cols = [
            feature_column_lib.sequence_numeric_column('a'),
            feature_column_lib.indicator_column(
                feature_column_lib.
                sequence_categorical_column_with_vocabulary_list(
                    'b', ['one', 'two']))
        ]
        input_layers = {
            'a':
            keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
            'b':
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name='b',
                               dtype='string')
        }

        fc_layer, _ = feature_column_lib.SequenceFeatures(cols)(input_layers)
        # TODO(tibell): Figure out the right dtype and apply masking.
        # sequence_length_mask = array_ops.sequence_mask(sequence_length)
        # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(loss=keras.losses.MSE,
                      optimizer='rmsprop',
                      metrics=[keras.metrics.categorical_accuracy])

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
                                              (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
                                              (batch_size, timesteps, 1))

        with self.cached_session():
            # Initialize tables for V1 lookup.
            if not context.executing_eagerly():
                self.evaluate(lookup_ops.tables_initializer())

            self.assertLen(
                loaded_model.predict({
                    'a': inputs_a,
                    'b': inputs_b
                }, steps=1), batch_size)
예제 #4
0
    def test_save_load_with_dense_features(self, tmpdir, api, loss, optimizer,
                                           metrics):
        if optimizer is None:
            pytest.skip()
        cols = [
            feature_column_lib.numeric_column("a"),
            feature_column_lib.indicator_column(
                feature_column_lib.categorical_column_with_vocabulary_list(
                    "b", ["one", "two"])),
        ]
        input_layers = {
            "a": keras.layers.Input(shape=(1, ), name="a"),
            "b": keras.layers.Input(shape=(1, ), name="b", dtype="string"),
        }

        fc_layer = dense_features.DenseFeatures(cols)(input_layers)
        output = keras.layers.Dense(10)(fc_layer)

        model = keras.models.Model(input_layers, output)

        model.compile(
            loss=loss,
            optimizer=optimizer,
            metrics=[metrics],
        )

        tiledb_uri = os.path.join(tmpdir, "model_array")
        tiledb_model_obj = TensorflowKerasTileDBModel(uri=tiledb_uri,
                                                      model=model)
        tiledb_model_obj.save(include_optimizer=True)
        loaded_model = tiledb_model_obj.load(compile_model=True)

        model_opt_weights = batch_get_value(getattr(model.optimizer,
                                                    "weights"))
        loaded_opt_weights = batch_get_value(
            getattr(loaded_model.optimizer, "weights"))

        # Assert optimizer weights are equal
        for weight_model, weight_loaded_model in zip(model_opt_weights,
                                                     loaded_opt_weights):
            np.testing.assert_array_equal(weight_model, weight_loaded_model)

        inputs_a = np.arange(10).reshape(10, 1)
        inputs_b = np.arange(10).reshape(10, 1).astype("str")

        # Assert model predictions are equal
        np.testing.assert_array_equal(
            loaded_model.predict({
                "a": inputs_a,
                "b": inputs_b
            }),
            model.predict({
                "a": inputs_a,
                "b": inputs_b
            }),
        )
예제 #5
0
    def test_forward_in_exported_sparse(self):
        features_columns = [
            fc.indicator_column(
                fc.categorical_column_with_vocabulary_list('x', range(10)))
        ]

        classifier = linear.LinearClassifier(feature_columns=features_columns)

        def train_input_fn():
            dataset = dataset_ops.Dataset.from_tensors({
                'x':
                sparse_tensor.SparseTensor(values=[1, 2, 3],
                                           indices=[[0, 0], [1, 0], [1, 1]],
                                           dense_shape=[2, 2]),
                'labels': [[0], [1]]
            })

            def _split(x):
                labels = x.pop('labels')
                return x, labels

            dataset = dataset.map(_split)
            return dataset

        classifier.train(train_input_fn, max_steps=1)

        classifier = extenders.forward_features(classifier,
                                                keys=['x'],
                                                sparse_default_values={'x': 0})

        def serving_input_fn():
            features_ph = array_ops.placeholder(dtype=dtypes.int32,
                                                name='x',
                                                shape=[None])
            features = {'x': layers.dense_to_sparse(features_ph)}
            return estimator_lib.export.ServingInputReceiver(
                features, {'x': features_ph})

        export_dir, tmpdir = self._export_estimator(classifier,
                                                    serving_input_fn)
        prediction_fn = from_saved_model(export_dir,
                                         signature_def_key='predict')

        features = (0, 2)
        prediction = prediction_fn({'x': features})

        self.assertIn('x', prediction)
        self.assertEqual(features, tuple(prediction['x']))
        gfile.DeleteRecursively(tmpdir)
예제 #6
0
    def test_functional_input_layer_with_numpy_input_fn(self):
        embedding_values = (
            (1., 2., 3., 4., 5.),  # id 0
            (6., 7., 8., 9., 10.),  # id 1
            (11., 12., 13., 14., 15.)  # id 2
        )

        def _initializer(shape, dtype, partition_info):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in input_layer
        price = fc.numeric_column('price')
        body_style = fc.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        # one_hot_body_style has 3 dims in input_layer.
        one_hot_body_style = fc.indicator_column(body_style)
        # embedded_body_style has 5 dims in input_layer.
        embedded_body_style = fc.embedding_column(body_style,
                                                  dimension=5,
                                                  initializer=_initializer)

        input_fn = numpy_io.numpy_input_fn(x={
            'price':
            np.array([11., 12., 13., 14.]),
            'body-style':
            np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
                                           batch_size=2,
                                           shuffle=False)
        features = input_fn()
        net = fc.input_layer(features,
                             [price, one_hot_body_style, embedded_body_style])
        self.assertEqual(1 + 3 + 5, net.shape[1])
        with self._initialized_session() as sess:
            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual([[11., 12., 13., 14., 15., 0., 0., 1., 11.],
                                 [1., 2., 3., 4., 5., 1., 0., 0., 12]],
                                sess.run(net))

            coord.request_stop()
            coord.join(threads)
예제 #7
0
  def test_string_input(self):
    x = {'age': np.random.random((1024, 1)),
         'cabin': np.array(['a'] * 1024)}
    y = np.random.randint(2, size=(1024, 1))
    ds1 = dataset_ops.Dataset.from_tensor_slices(x)
    ds2 = dataset_ops.Dataset.from_tensor_slices(y)
    dataset = dataset_ops.Dataset.zip((ds1, ds2)).batch(4)
    categorical_cols = [fc.categorical_column_with_hash_bucket('cabin', 10)]
    feature_cols = ([fc.numeric_column('age')]
                    + [fc.indicator_column(cc) for cc in categorical_cols])
    layers = [fc.DenseFeatures(feature_cols),
              keras.layers.Dense(128),
              keras.layers.Dense(1)]

    model = keras.models.Sequential(layers)
    model.compile(optimizer='sgd',
                  loss=keras.losses.BinaryCrossentropy())
    model.fit(dataset)
예제 #8
0
  def test_functional_input_layer_with_numpy_input_fn(self):
    embedding_values = (
        (1., 2., 3., 4., 5.),  # id 0
        (6., 7., 8., 9., 10.),  # id 1
        (11., 12., 13., 14., 15.)  # id 2
    )
    def _initializer(shape, dtype, partition_info):
      del shape, dtype, partition_info
      return embedding_values

    # price has 1 dimension in input_layer
    price = fc.numeric_column('price')
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
    # one_hot_body_style has 3 dims in input_layer.
    one_hot_body_style = fc.indicator_column(body_style)
    # embedded_body_style has 5 dims in input_layer.
    embedded_body_style = fc.embedding_column(body_style, dimension=5,
                                              initializer=_initializer)

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([11., 12., 13., 14.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.input_layer(features,
                         [price, one_hot_body_style, embedded_body_style])
    self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      # Each row is formed by concatenating `embedded_body_style`,
      # `one_hot_body_style`, and `price` in order.
      self.assertAllEqual(
          [[11., 12., 13., 14., 15., 0., 0., 1., 11.],
           [1., 2., 3., 4., 5., 1., 0., 0., 12]],
          sess.run(net))

      coord.request_stop()
      coord.join(threads)
    def test_sequential_model_with_crossed_column(self):
        feature_columns = []
        age_buckets = fc.bucketized_column(
            fc.numeric_column('age'),
            boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
        feature_columns.append(age_buckets)

        # indicator cols
        thal = fc.categorical_column_with_vocabulary_list(
            'thal', ['fixed', 'normal', 'reversible'])

        crossed_feature = fc.crossed_column([age_buckets, thal],
                                            hash_bucket_size=1000)
        crossed_feature = fc.indicator_column(crossed_feature)
        feature_columns.append(crossed_feature)

        feature_layer = df.DenseFeatures(feature_columns)

        model = keras.models.Sequential([
            feature_layer,
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dense(1, activation='sigmoid')
        ])

        age_data = np.random.randint(10, 100, size=100)
        thal_data = np.random.choice(['fixed', 'normal', 'reversible'],
                                     size=100)
        inp_x = {'age': age_data, 'thal': thal_data}
        inp_y = np.random.randint(0, 1, size=100)
        ds = dataset_ops.Dataset.from_tensor_slices((inp_x, inp_y)).batch(5)
        model.compile(
            optimizer='adam',
            loss='binary_crossentropy',
            metrics=['accuracy'],
        )
        model.fit(ds, epochs=1)
        model.fit(ds, epochs=1)
        model.evaluate(ds)
        model.predict(ds)
예제 #10
0
    def test_save_load_with_sequence_features(self):
        cols = [
            feature_column_lib.sequence_numeric_column("a"),
            feature_column_lib.indicator_column(
                feature_column_lib.
                sequence_categorical_column_with_vocabulary_list(
                    "b", ["one", "two"])),
        ]
        input_layers = {
            "a":
            keras.layers.Input(shape=(None, 1), sparse=True, name="a"),
            "b":
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name="b",
                               dtype="string"),
        }

        fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(
            loss=keras.losses.MSE,
            optimizer="rmsprop",
            metrics=[keras.metrics.categorical_accuracy],
        )

        tiledb_uri = os.path.join(self.get_temp_dir(), "model_array")
        tiledb_model_obj = TensorflowTileDB(uri=tiledb_uri)
        tiledb_model_obj.save(model=model, include_optimizer=True)
        loaded_model = tiledb_model_obj.load(compile_model=True)

        model_opt_weights = batch_get_value(getattr(model.optimizer,
                                                    "weights"))
        loaded_opt_weights = batch_get_value(
            getattr(loaded_model.optimizer, "weights"))

        # Assert optimizer weights are equal
        for weight_model, weight_loaded_model in zip(model_opt_weights,
                                                     loaded_opt_weights):
            np.testing.assert_array_equal(weight_model, weight_loaded_model)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
                                              (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
                                              (batch_size, timesteps, 1))

        # Assert model predictions are equal
        np.testing.assert_array_equal(
            loaded_model.predict({
                "a": inputs_a,
                "b": inputs_b
            }, steps=1),
            model.predict({
                "a": inputs_a,
                "b": inputs_b
            }, steps=1),
        )