Exemple #1
0
 def testExportMonitor_EstimatorProvidesSignature(self):
     random.seed(42)
     x = np.random.rand(1000)
     y = 2 * x + 3
     cont_features = [feature_column.real_valued_column('', dimension=1)]
     regressor = learn.LinearRegressor(feature_columns=cont_features)
     export_dir = os.path.join(tempfile.mkdtemp(), 'export')
     export_monitor = learn.monitors.ExportMonitor(every_n_steps=1,
                                                   export_dir=export_dir,
                                                   exports_to_keep=2)
     regressor.fit(x, y, steps=10, monitors=[export_monitor])
     self._assert_export(export_monitor, export_dir, 'regression_signature')
Exemple #2
0
  def testPartitionedMixedFeatures(self):
    """Tests SDCALogisticClassifier with a mix of features (partitioned)."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.6], [0.8], [0.3]]),
          'sq_footage':
              constant_op.constant([900.0, 700.0, 600.0]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[3.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    with self._single_threaded_test_session():
      price = feature_column_lib.real_valued_column('price')
      sq_footage_bucket = feature_column_lib.bucketized_column(
          feature_column_lib.real_valued_column('sq_footage'),
          boundaries=[650.0, 800.0])
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      sq_footage_country = feature_column_lib.crossed_column(
          [sq_footage_bucket, country], hash_bucket_size=10)
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[
              price, sq_footage_bucket, country, sq_footage_country
          ],
          weight_column_name='weights',
          partitioner=partitioned_variables.fixed_size_partitioner(
              num_shards=2, axis=0))
      classifier.fit(input_fn=input_fn, steps=50)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertGreater(metrics['accuracy'], 0.9)
Exemple #3
0
  def testRealValuedFeatures(self):
    """Tests SDCALogisticClassifier works with real valued features."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2']),
          'maintenance_cost': constant_op.constant([500.0, 200.0]),
          'sq_footage': constant_op.constant([[800.0], [600.0]]),
          'weights': constant_op.constant([[1.0], [1.0]])
      }, constant_op.constant([[0], [1]])

    with self._single_threaded_test_session():
      maintenance_cost = feature_column_lib.real_valued_column(
          'maintenance_cost')
      sq_footage = feature_column_lib.real_valued_column('sq_footage')
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[maintenance_cost, sq_footage],
          weight_column_name='weights')
      classifier.fit(input_fn=input_fn, steps=100)
      loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
      self.assertLess(loss, 0.05)
Exemple #4
0
  def testMixedFeaturesArbitraryWeights(self):
    """Tests SDCALinearRegressor works with a mix of features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.6], [0.8], [0.3]]),
          'sq_footage':
              constant_op.constant([[900.0], [700.0], [600.0]]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[3.0], [5.0], [7.0]])
      }, constant_op.constant([[1.55], [-1.25], [-3.0]])

    with self._single_threaded_test_session():
      price = feature_column_lib.real_valued_column('price')
      sq_footage_bucket = feature_column_lib.bucketized_column(
          feature_column_lib.real_valued_column('sq_footage'),
          boundaries=[650.0, 800.0])
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      sq_footage_country = feature_column_lib.crossed_column(
          [sq_footage_bucket, country], hash_bucket_size=10)
      regressor = sdca_estimator.SDCALinearRegressor(
          example_id_column='example_id',
          feature_columns=[
              price, sq_footage_bucket, country, sq_footage_country
          ],
          l2_regularization=1.0,
          weight_column_name='weights')
      regressor.fit(input_fn=input_fn, steps=20)
      loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
      self.assertLess(loss, 0.05)
Exemple #5
0
    def testFitAndEvaluateDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.DNNBoostedTreeCombinedClassifier(
            dnn_hidden_units=[1],
            dnn_feature_columns=[feature_column.real_valued_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            n_classes=2,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=False,
            tree_feature_columns=[feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
Exemple #6
0
    def testDNNModel(self):
        """Tests multi-class classification using matrix data as input."""
        cont_features = [
            feature_column.real_valued_column('feature', dimension=4)
        ]

        head = head_lib.multi_class_head(n_classes=3)
        classifier = _dnn_estimator(head,
                                    feature_columns=cont_features,
                                    hidden_units=[3, 3])

        classifier.fit(input_fn=_iris_input_fn, steps=1000)
        classifier.evaluate(input_fn=_iris_input_fn, steps=100)
Exemple #7
0
    def testMixedFeatures(self):
        """Tests SVM classifier with a mix of features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([0.6, 0.8, 0.3]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price = feature_column.real_valued_column('price')
        sq_footage_bucket = feature_column.bucketized_column(
            feature_column.real_valued_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column.sparse_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column.crossed_column(
            [sq_footage_bucket, country], hash_bucket_size=10)
        svm_classifier = svm.SVM(feature_columns=[
            price, sq_footage_bucket, country, sq_footage_country
        ],
                                 example_id_column='example_id',
                                 weight_column_name='weights',
                                 l1_regularization=0.1,
                                 l2_regularization=1.0)

        svm_classifier.fit(input_fn=input_fn, steps=30)
        accuracy = svm_classifier.evaluate(input_fn=input_fn,
                                           steps=1)['accuracy']
        self.assertAlmostEqual(accuracy, 1.0, places=3)
Exemple #8
0
    def testPrepareInputsForRnnSparseAndDense(self):
        num_unroll = 2
        embedding_dimension = 8
        dense_dimension = 2

        expected = [
            np.array([[1., 1., 1., 1., 1., 1., 1., 1., 111., 112.],
                      [1., 1., 1., 1., 1., 1., 1., 1., 211., 212.],
                      [1., 1., 1., 1., 1., 1., 1., 1., 311., 312.]]),
            np.array([[1., 1., 1., 1., 1., 1., 1., 1., 121., 122.],
                      [2., 2., 2., 2., 2., 2., 2., 2., 221., 222.],
                      [1., 1., 1., 1., 1., 1., 1., 1., 321., 322.]])
        ]

        sequence_features = {
            'wire_cast':
            sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1,
                                                            0], [1, 0, 0],
                                                [1, 1, 0], [1, 1, 1],
                                                [2, 0, 0], [2, 1, 1]],
                                       values=[
                                           b'marlo', b'stringer', b'omar',
                                           b'stringer', b'marlo', b'marlo',
                                           b'omar'
                                       ],
                                       dense_shape=[3, 2, 2]),
            'seq_feature0':
            constant_op.constant([[[111., 112.], [121., 122.]],
                                  [[211., 212.], [221., 222.]],
                                  [[311., 312.], [321., 322.]]])
        }

        wire_cast = feature_column.sparse_column_with_keys(
            'wire_cast', ['marlo', 'omar', 'stringer'])
        wire_cast_embedded = feature_column.embedding_column(
            wire_cast,
            dimension=embedding_dimension,
            combiner='sum',
            initializer=init_ops.ones_initializer())
        seq_feature0_column = feature_column.real_valued_column(
            'seq_feature0', dimension=dense_dimension)

        sequence_feature_columns = [seq_feature0_column, wire_cast_embedded]

        context_features = None

        self._test_prepare_inputs_for_rnn(sequence_features, context_features,
                                          sequence_feature_columns, num_unroll,
                                          expected)
Exemple #9
0
    def testRealValuedFeaturesPerfectlySeparable(self):
        """Tests SVM classifier with real valued features."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'feature1': constant_op.constant([[0.0], [1.0], [3.0]]),
                'feature2': constant_op.constant([[1.0], [-1.2], [1.0]]),
            }, constant_op.constant([[1], [0], [1]])

        feature1 = feature_column.real_valued_column('feature1')
        feature2 = feature_column.real_valued_column('feature2')
        svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                                 example_id_column='example_id',
                                 l1_regularization=0.0,
                                 l2_regularization=0.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
        loss = metrics['loss']
        accuracy = metrics['accuracy']
        # The points are not only separable but there exist weights (for instance
        # w1=0.0, w2=1.0) that satisfy the margin inequalities (y_i* w^T*x_i >=1).
        # The unregularized loss should therefore be 0.0.
        self.assertAlmostEqual(loss, 0.0, places=3)
        self.assertAlmostEqual(accuracy, 1.0, places=3)
Exemple #10
0
  def testDNNRegression(self):
    my_seed = 42
    config = run_config.RunConfig(tf_random_seed=my_seed)
    boston = base.load_boston()
    columns = [feature_column.real_valued_column('', dimension=13)]

    with ops.Graph().as_default() as g1:
      random.seed(my_seed)
      g1.seed = my_seed
      variables.create_global_step()
      regressor1 = dnn.DNNRegressor(
          hidden_units=[10],
          feature_columns=columns,
          optimizer=_NULL_OPTIMIZER,
          config=config)
      regressor1.fit(x=boston.data, y=boston.target, steps=1)

    with ops.Graph().as_default() as g2:
      random.seed(my_seed)
      g2.seed = my_seed
      variables.create_global_step()
      regressor2 = dnn.DNNRegressor(
          hidden_units=[10],
          feature_columns=columns,
          optimizer=_NULL_OPTIMIZER,
          config=config)
      regressor2.fit(x=boston.data, y=boston.target, steps=1)

    weights1 = ([regressor1.get_variable_value('dnn/hiddenlayer_0/weights')] +
                [regressor1.get_variable_value('dnn/logits/weights')])
    weights2 = ([regressor2.get_variable_value('dnn/hiddenlayer_0/weights')] +
                [regressor2.get_variable_value('dnn/logits/weights')])
    for w1, w2 in zip(weights1, weights2):
      self.assertAllClose(w1, w2)

    biases1 = ([regressor1.get_variable_value('dnn/hiddenlayer_0/biases')] +
               [regressor1.get_variable_value('dnn/logits/biases')])
    biases2 = ([regressor2.get_variable_value('dnn/hiddenlayer_0/biases')] +
               [regressor2.get_variable_value('dnn/logits/biases')])
    for b1, b2 in zip(biases1, biases2):
      self.assertAllClose(b1, b2)
    self.assertAllClose(
        list(regressor1.predict_scores(
            boston.data, as_iterable=True)),
        list(regressor2.predict_scores(
            boston.data, as_iterable=True)),
        atol=1e-05)
Exemple #11
0
  def setUp(self):
    super(DynamicRnnEstimatorTest, self).setUp()
    self.rnn_cell = rnn_cell.BasicRNNCell(self.NUM_RNN_CELL_UNITS)
    self.mock_target_column = MockTargetColumn(
        num_label_columns=self.NUM_LABEL_COLUMNS)

    location = feature_column.sparse_column_with_keys(
        'location', keys=['west_side', 'east_side', 'nyc'])
    location_onehot = feature_column.one_hot_column(location)
    self.context_feature_columns = [location_onehot]

    wire_cast = feature_column.sparse_column_with_keys(
        'wire_cast', ['marlo', 'omar', 'stringer'])
    wire_cast_embedded = feature_column.embedding_column(wire_cast, dimension=8)
    measurements = feature_column.real_valued_column(
        'measurements', dimension=2)
    self.sequence_feature_columns = [measurements, wire_cast_embedded]
Exemple #12
0
def _infer_real_valued_column_for_tensor(name, tensor):
    """Creates a real_valued_column for given tensor and name."""
    if isinstance(tensor, sparse_tensor_py.SparseTensor):
        raise ValueError(
            'SparseTensor is not supported for auto detection. Please define '
            'corresponding FeatureColumn for tensor {} {}.', name, tensor)

    if not (tensor.dtype.is_integer or tensor.dtype.is_floating):
        raise ValueError(
            'Non integer or non floating types are not supported for auto detection'
            '. Please define corresponding FeatureColumn for tensor {} {}.',
            name, tensor)

    shape = tensor.get_shape().as_list()
    dimension = 1
    for i in range(1, len(shape)):
        dimension *= shape[i]
    return fc.real_valued_column(name, dimension=dimension, dtype=tensor.dtype)
    def testFitAndEvaluateDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
        classifier.export(self._export_dir_base)
    def testOverridesGlobalSteps(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 2
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=False,
            override_global_step_value=10000000)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        self._assert_checkpoint(classifier.model_dir, global_step=10000000)
    def testThatLeafIndexIsInPredictions(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=True)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        result_iter = classifier.predict(input_fn=_eval_input_fn)
        for prediction_dict in result_iter:
            self.assertTrue("leaf_index" in prediction_dict)
            self.assertTrue("logits" in prediction_dict)
Exemple #16
0
  def testRealValuedFeatureWithHigherDimension(self):
    """Tests SDCALogisticClassifier with high-dimension real valued features."""

    # input_fn is identical to the one in testRealValuedFeatures where 2
    # 1-dimensional dense features are replaced by a 2-dimensional feature.
    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2']),
          'dense_feature':
              constant_op.constant([[500.0, 800.0], [200.0, 600.0]])
      }, constant_op.constant([[0], [1]])

    with self._single_threaded_test_session():
      dense_feature = feature_column_lib.real_valued_column(
          'dense_feature', dimension=2)
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id', feature_columns=[dense_feature])
      classifier.fit(input_fn=input_fn, steps=100)
      loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
      self.assertLess(loss, 0.05)
Exemple #17
0
  def testLinearRegression(self):
    my_seed = 42
    config = run_config.RunConfig(tf_random_seed=my_seed)
    boston = base.load_boston()
    columns = [feature_column.real_valued_column('', dimension=13)]

    # We train with

    with ops.Graph().as_default() as g1:
      random.seed(my_seed)
      g1.seed = my_seed
      variables.create_global_step()
      regressor1 = linear.LinearRegressor(
          optimizer=_NULL_OPTIMIZER, feature_columns=columns, config=config)
      regressor1.fit(x=boston.data, y=boston.target, steps=1)

    with ops.Graph().as_default() as g2:
      random.seed(my_seed)
      g2.seed = my_seed
      variables.create_global_step()
      regressor2 = linear.LinearRegressor(
          optimizer=_NULL_OPTIMIZER, feature_columns=columns, config=config)
      regressor2.fit(x=boston.data, y=boston.target, steps=1)

    variable_names = regressor1.get_variable_names()
    self.assertIn('linear//weight', variable_names)
    self.assertIn('linear/bias_weight', variable_names)
    regressor1_weights = regressor1.get_variable_value('linear//weight')
    regressor2_weights = regressor2.get_variable_value('linear//weight')
    regressor1_bias = regressor1.get_variable_value('linear/bias_weight')
    regressor2_bias = regressor2.get_variable_value('linear/bias_weight')
    self.assertAllClose(regressor1_weights, regressor2_weights)
    self.assertAllClose(regressor1_bias, regressor2_bias)
    self.assertAllClose(
        list(regressor1.predict_scores(
            boston.data, as_iterable=True)),
        list(regressor2.predict_scores(
            boston.data, as_iterable=True)),
        atol=1e-05)
Exemple #18
0
def _make_experiment_fn(output_dir):
    """Creates experiment for gradient boosted decision trees."""
    (x_train,
     y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data()

    train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
        x={"x": x_train},
        y=y_train,
        batch_size=FLAGS.batch_size,
        num_epochs=None,
        shuffle=True)
    eval_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
        x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False)

    feature_columns = [
        feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES)
    ]
    feature_spec = tf.contrib.layers.create_feature_spec_for_parsing(
        feature_columns)
    serving_input_fn = tf.contrib.learn.utils.build_parsing_serving_input_fn(
        feature_spec)
    # An export strategy that outputs the feature importance and also exports
    # the internal tree representation in another format.
    export_strategy = custom_export_strategy.make_custom_export_strategy(
        "exports",
        convert_fn=_convert_fn,
        feature_columns=feature_columns,
        export_input_fn=serving_input_fn)
    return tf.contrib.learn.Experiment(estimator=_get_tfbt(
        output_dir, feature_columns),
                                       train_input_fn=train_input_fn,
                                       eval_input_fn=eval_input_fn,
                                       train_steps=None,
                                       eval_steps=FLAGS.num_eval_steps,
                                       eval_metrics=None,
                                       export_strategies=[export_strategy])
Exemple #19
0
class DynamicRnnEstimatorTest(test.TestCase):

  NUM_RNN_CELL_UNITS = 8
  NUM_LABEL_COLUMNS = 6
  INPUTS_COLUMN = feature_column.real_valued_column(
      'inputs', dimension=NUM_LABEL_COLUMNS)

  def setUp(self):
    super(DynamicRnnEstimatorTest, self).setUp()
    self.rnn_cell = rnn_cell.BasicRNNCell(self.NUM_RNN_CELL_UNITS)
    self.mock_target_column = MockTargetColumn(
        num_label_columns=self.NUM_LABEL_COLUMNS)

    location = feature_column.sparse_column_with_keys(
        'location', keys=['west_side', 'east_side', 'nyc'])
    location_onehot = feature_column.one_hot_column(location)
    self.context_feature_columns = [location_onehot]

    wire_cast = feature_column.sparse_column_with_keys(
        'wire_cast', ['marlo', 'omar', 'stringer'])
    wire_cast_embedded = feature_column.embedding_column(wire_cast, dimension=8)
    measurements = feature_column.real_valued_column(
        'measurements', dimension=2)
    self.sequence_feature_columns = [measurements, wire_cast_embedded]

  def GetColumnsToTensors(self):
    """Get columns_to_tensors matching setUp(), in the current default graph."""
    return {
        'location':
            sparse_tensor.SparseTensor(
                indices=[[0, 0], [1, 0], [2, 0]],
                values=['west_side', 'west_side', 'nyc'],
                dense_shape=[3, 1]),
        'wire_cast':
            sparse_tensor.SparseTensor(
                indices=[[0, 0, 0], [0, 1, 0],
                         [1, 0, 0], [1, 1, 0], [1, 1, 1],
                         [2, 0, 0]],
                values=[b'marlo', b'stringer',
                        b'omar', b'stringer', b'marlo',
                        b'marlo'],
                dense_shape=[3, 2, 2]),
        'measurements':
            random_ops.random_uniform(
                [3, 2, 2], seed=4711)
    }

  def GetClassificationTargetsOrNone(self, mode):
    """Get targets matching setUp() and mode, in the current default graph."""
    return (random_ops.random_uniform(
        [3, 2, 1], 0, 2, dtype=dtypes.int64, seed=1412) if
            mode != model_fn_lib.ModeKeys.INFER else None)

  def testBuildSequenceInputInput(self):
    sequence_input = dynamic_rnn_estimator.build_sequence_input(
        self.GetColumnsToTensors(), self.sequence_feature_columns,
        self.context_feature_columns)
    with self.cached_session() as sess:
      sess.run(variables.global_variables_initializer())
      sess.run(lookup_ops.tables_initializer())
      sequence_input_val = sess.run(sequence_input)
    expected_shape = np.array([
        3,  # expected batch size
        2,  # padded sequence length
        3 + 8 + 2  # location keys + embedding dim + measurement dimension
    ])
    self.assertAllEqual(expected_shape, sequence_input_val.shape)

  def testConstructRNN(self):
    initial_state = None
    sequence_input = dynamic_rnn_estimator.build_sequence_input(
        self.GetColumnsToTensors(), self.sequence_feature_columns,
        self.context_feature_columns)
    activations_t, final_state_t = dynamic_rnn_estimator.construct_rnn(
        initial_state, sequence_input, self.rnn_cell,
        self.mock_target_column.num_label_columns)

    # Obtain values of activations and final state.
    with session.Session() as sess:
      sess.run(variables.global_variables_initializer())
      sess.run(lookup_ops.tables_initializer())
      activations, final_state = sess.run([activations_t, final_state_t])

    expected_activations_shape = np.array([3, 2, self.NUM_LABEL_COLUMNS])
    self.assertAllEqual(expected_activations_shape, activations.shape)
    expected_state_shape = np.array([3, self.NUM_RNN_CELL_UNITS])
    self.assertAllEqual(expected_state_shape, final_state.shape)

  def testGetOutputAlternatives(self):
    test_cases = (
        (rnn_common.PredictionType.SINGLE_VALUE,
         constants.ProblemType.CLASSIFICATION,
         {prediction_key.PredictionKey.CLASSES: True,
          prediction_key.PredictionKey.PROBABILITIES: True,
          dynamic_rnn_estimator._get_state_name(0): True},
         {'dynamic_rnn_output':
          (constants.ProblemType.CLASSIFICATION,
           {prediction_key.PredictionKey.CLASSES: True,
            prediction_key.PredictionKey.PROBABILITIES: True})}),

        (rnn_common.PredictionType.SINGLE_VALUE,
         constants.ProblemType.LINEAR_REGRESSION,
         {prediction_key.PredictionKey.SCORES: True,
          dynamic_rnn_estimator._get_state_name(0): True,
          dynamic_rnn_estimator._get_state_name(1): True},
         {'dynamic_rnn_output':
          (constants.ProblemType.LINEAR_REGRESSION,
           {prediction_key.PredictionKey.SCORES: True})}),

        (rnn_common.PredictionType.MULTIPLE_VALUE,
         constants.ProblemType.CLASSIFICATION,
         {prediction_key.PredictionKey.CLASSES: True,
          prediction_key.PredictionKey.PROBABILITIES: True,
          dynamic_rnn_estimator._get_state_name(0): True},
         None))

    for pred_type, prob_type, pred_dict, expected_alternatives in test_cases:
      actual_alternatives = dynamic_rnn_estimator._get_output_alternatives(
          pred_type, prob_type, pred_dict)
      self.assertEqual(expected_alternatives, actual_alternatives)

  # testGetDynamicRnnModelFn{Train,Eval,Infer}() test which fields
  # of ModelFnOps are set depending on mode.
  def testGetDynamicRnnModelFnTrain(self):
    model_fn_ops = self._GetModelFnOpsForMode(model_fn_lib.ModeKeys.TRAIN)
    self.assertIsNotNone(model_fn_ops.predictions)
    self.assertIsNotNone(model_fn_ops.loss)
    self.assertIsNotNone(model_fn_ops.train_op)
    # None may get normalized to {}; we accept neither.
    self.assertNotEqual(len(model_fn_ops.eval_metric_ops), 0)

  def testGetDynamicRnnModelFnEval(self):
    model_fn_ops = self._GetModelFnOpsForMode(model_fn_lib.ModeKeys.EVAL)
    self.assertIsNotNone(model_fn_ops.predictions)
    self.assertIsNotNone(model_fn_ops.loss)
    self.assertIsNone(model_fn_ops.train_op)
    # None may get normalized to {}; we accept neither.
    self.assertNotEqual(len(model_fn_ops.eval_metric_ops), 0)

  def testGetDynamicRnnModelFnInfer(self):
    model_fn_ops = self._GetModelFnOpsForMode(model_fn_lib.ModeKeys.INFER)
    self.assertIsNotNone(model_fn_ops.predictions)
    self.assertIsNone(model_fn_ops.loss)
    self.assertIsNone(model_fn_ops.train_op)
    # None may get normalized to {}; we accept both.
    self.assertFalse(model_fn_ops.eval_metric_ops)

  def _GetModelFnOpsForMode(self, mode):
    """Helper for testGetDynamicRnnModelFn{Train,Eval,Infer}()."""
    model_fn = dynamic_rnn_estimator._get_dynamic_rnn_model_fn(
        cell_type='basic_rnn',
        num_units=[10],
        target_column=target_column_lib.multi_class_target(n_classes=2),
        # Only CLASSIFICATION yields eval metrics to test for.
        problem_type=constants.ProblemType.CLASSIFICATION,
        prediction_type=rnn_common.PredictionType.MULTIPLE_VALUE,
        optimizer='SGD',
        sequence_feature_columns=self.sequence_feature_columns,
        context_feature_columns=self.context_feature_columns,
        learning_rate=0.1)
    labels = self.GetClassificationTargetsOrNone(mode)
    model_fn_ops = model_fn(
        features=self.GetColumnsToTensors(), labels=labels, mode=mode)
    return model_fn_ops

  def testExport(self):
    input_feature_key = 'magic_input_feature_key'

    def get_input_fn(mode):

      def input_fn():
        features = self.GetColumnsToTensors()
        if mode == model_fn_lib.ModeKeys.INFER:
          input_examples = array_ops.placeholder(dtypes.string)
          features[input_feature_key] = input_examples
          # Real code would now parse features out of input_examples,
          # but this test can just stick to the constants above.
        return features, self.GetClassificationTargetsOrNone(mode)

      return input_fn

    model_dir = tempfile.mkdtemp()

    def estimator_fn():
      return dynamic_rnn_estimator.DynamicRnnEstimator(
          problem_type=constants.ProblemType.CLASSIFICATION,
          prediction_type=rnn_common.PredictionType.MULTIPLE_VALUE,
          num_classes=2,
          num_units=self.NUM_RNN_CELL_UNITS,
          sequence_feature_columns=self.sequence_feature_columns,
          context_feature_columns=self.context_feature_columns,
          predict_probabilities=True,
          model_dir=model_dir)

    # Train a bit to create an exportable checkpoint.
    estimator_fn().fit(input_fn=get_input_fn(model_fn_lib.ModeKeys.TRAIN),
                       steps=100)
    # Now export, but from a fresh estimator instance, like you would
    # in an export binary. That means .export() has to work without
    # .fit() being called on the same object.
    export_dir = tempfile.mkdtemp()
    print('Exporting to', export_dir)
    estimator_fn().export(
        export_dir,
        input_fn=get_input_fn(model_fn_lib.ModeKeys.INFER),
        use_deprecated_input_fn=False,
        input_feature_key=input_feature_key)

  def testStateTupleDictConversion(self):
    """Test `state_tuple_to_dict` and `dict_to_state_tuple`."""
    cell_sizes = [5, 3, 7]
    # A MultiRNNCell of LSTMCells is both a common choice and an interesting
    # test case, because it has two levels of nesting, with an inner class that
    # is not a plain tuple.
    cell = rnn_cell.MultiRNNCell(
        [rnn_cell.LSTMCell(i) for i in cell_sizes])
    state_dict = {
        dynamic_rnn_estimator._get_state_name(i):
        array_ops.expand_dims(math_ops.range(cell_size), 0)
        for i, cell_size in enumerate([5, 5, 3, 3, 7, 7])
    }
    expected_state = (rnn_cell.LSTMStateTuple(
        np.reshape(np.arange(5), [1, -1]), np.reshape(np.arange(5), [1, -1])),
                      rnn_cell.LSTMStateTuple(
                          np.reshape(np.arange(3), [1, -1]),
                          np.reshape(np.arange(3), [1, -1])),
                      rnn_cell.LSTMStateTuple(
                          np.reshape(np.arange(7), [1, -1]),
                          np.reshape(np.arange(7), [1, -1])))
    actual_state = dynamic_rnn_estimator.dict_to_state_tuple(state_dict, cell)
    flattened_state = dynamic_rnn_estimator.state_tuple_to_dict(actual_state)

    with self.cached_session() as sess:
      (state_dict_val, actual_state_val, flattened_state_val) = sess.run(
          [state_dict, actual_state, flattened_state])

    def _recursive_assert_equal(x, y):
      self.assertEqual(type(x), type(y))
      if isinstance(x, (list, tuple)):
        self.assertEqual(len(x), len(y))
        for i, _ in enumerate(x):
          _recursive_assert_equal(x[i], y[i])
      elif isinstance(x, np.ndarray):
        np.testing.assert_array_equal(x, y)
      else:
        self.fail('Unexpected type: {}'.format(type(x)))

    for k in state_dict_val.keys():
      np.testing.assert_array_almost_equal(
          state_dict_val[k],
          flattened_state_val[k],
          err_msg='Wrong value for state component {}.'.format(k))
    _recursive_assert_equal(expected_state, actual_state_val)

  def testMultiRNNState(self):
    """Test that state flattening/reconstruction works for `MultiRNNCell`."""
    batch_size = 11
    sequence_length = 16
    train_steps = 5
    cell_sizes = [4, 8, 7]
    learning_rate = 0.1

    def get_shift_input_fn(batch_size, sequence_length, seed=None):

      def input_fn():
        random_sequence = random_ops.random_uniform(
            [batch_size, sequence_length + 1],
            0,
            2,
            dtype=dtypes.int32,
            seed=seed)
        labels = array_ops.slice(random_sequence, [0, 0],
                                 [batch_size, sequence_length])
        inputs = array_ops.expand_dims(
            math_ops.cast(
                array_ops.slice(random_sequence, [0, 1],
                                [batch_size, sequence_length]),
                dtypes.float32), 2)
        input_dict = {
            dynamic_rnn_estimator._get_state_name(i): random_ops.random_uniform(
                [batch_size, cell_size], seed=((i + 1) * seed))
            for i, cell_size in enumerate([4, 4, 8, 8, 7, 7])
        }
        input_dict['inputs'] = inputs
        return input_dict, labels

      return input_fn

    seq_columns = [feature_column.real_valued_column('inputs', dimension=1)]
    config = run_config.RunConfig(tf_random_seed=21212)
    cell_type = 'lstm'
    sequence_estimator = dynamic_rnn_estimator.DynamicRnnEstimator(
        problem_type=constants.ProblemType.CLASSIFICATION,
        prediction_type=rnn_common.PredictionType.MULTIPLE_VALUE,
        num_classes=2,
        num_units=cell_sizes,
        sequence_feature_columns=seq_columns,
        cell_type=cell_type,
        learning_rate=learning_rate,
        config=config,
        predict_probabilities=True)

    train_input_fn = get_shift_input_fn(batch_size, sequence_length, seed=12321)
    eval_input_fn = get_shift_input_fn(batch_size, sequence_length, seed=32123)

    sequence_estimator.fit(input_fn=train_input_fn, steps=train_steps)

    prediction_dict = sequence_estimator.predict(
        input_fn=eval_input_fn, as_iterable=False)
    for i, state_size in enumerate([4, 4, 8, 8, 7, 7]):
      state_piece = prediction_dict[dynamic_rnn_estimator._get_state_name(i)]
      self.assertListEqual(list(state_piece.shape), [batch_size, state_size])

  def testMultipleRuns(self):
    """Tests resuming training by feeding state."""
    cell_sizes = [4, 7]
    batch_size = 11
    learning_rate = 0.1
    train_sequence_length = 21
    train_steps = 121
    dropout_keep_probabilities = [0.5, 0.5, 0.5]
    prediction_steps = [3, 2, 5, 11, 6]

    def get_input_fn(batch_size, sequence_length, state_dict, starting_step=0):

      def input_fn():
        sequence = constant_op.constant(
            [[(starting_step + i + j) % 2 for j in range(sequence_length + 1)]
             for i in range(batch_size)],
            dtype=dtypes.int32)
        labels = array_ops.slice(sequence, [0, 0],
                                 [batch_size, sequence_length])
        inputs = array_ops.expand_dims(
            math_ops.cast(
                array_ops.slice(sequence, [0, 1], [batch_size, sequence_length
                                                  ]),
                dtypes.float32), 2)
        input_dict = state_dict
        input_dict['inputs'] = inputs
        return input_dict, labels

      return input_fn

    seq_columns = [feature_column.real_valued_column('inputs', dimension=1)]
    config = run_config.RunConfig(tf_random_seed=21212)

    model_dir = tempfile.mkdtemp()
    sequence_estimator = dynamic_rnn_estimator.DynamicRnnEstimator(
        problem_type=constants.ProblemType.CLASSIFICATION,
        prediction_type=rnn_common.PredictionType.MULTIPLE_VALUE,
        num_classes=2,
        sequence_feature_columns=seq_columns,
        num_units=cell_sizes,
        cell_type='lstm',
        dropout_keep_probabilities=dropout_keep_probabilities,
        learning_rate=learning_rate,
        config=config,
        model_dir=model_dir)

    train_input_fn = get_input_fn(
        batch_size, train_sequence_length, state_dict={})

    sequence_estimator.fit(input_fn=train_input_fn, steps=train_steps)

    def incremental_predict(estimator, increments):
      """Run `estimator.predict` for `i` steps for `i` in `increments`."""
      step = 0
      incremental_state_dict = {}
      for increment in increments:
        input_fn = get_input_fn(
            batch_size,
            increment,
            state_dict=incremental_state_dict,
            starting_step=step)
        prediction_dict = estimator.predict(
            input_fn=input_fn, as_iterable=False)
        step += increment
        incremental_state_dict = {
            k: v
            for (k, v) in prediction_dict.items()
            if k.startswith(rnn_common.RNNKeys.STATE_PREFIX)
        }
      return prediction_dict

    pred_all_at_once = incremental_predict(sequence_estimator,
                                           [sum(prediction_steps)])
    pred_step_by_step = incremental_predict(sequence_estimator,
                                            prediction_steps)

    # Check that the last `prediction_steps[-1]` steps give the same
    # predictions.
    np.testing.assert_array_equal(
        pred_all_at_once[prediction_key.PredictionKey.CLASSES]
        [:, -1 * prediction_steps[-1]:],
        pred_step_by_step[prediction_key.PredictionKey.CLASSES],
        err_msg='Mismatch on last {} predictions.'.format(prediction_steps[-1]))
    # Check that final states are identical.
    for k, v in pred_all_at_once.items():
      if k.startswith(rnn_common.RNNKeys.STATE_PREFIX):
        np.testing.assert_array_equal(
            v, pred_step_by_step[k], err_msg='Mismatch on state {}.'.format(k))
Exemple #20
0
  def DISABLED_testLearnMajority(self):
    """Test learning the 'majority' function."""
    batch_size = 16
    sequence_length = 7
    train_steps = 500
    eval_steps = 20
    cell_type = 'lstm'
    cell_size = 4
    optimizer_type = 'Momentum'
    learning_rate = 2.0
    momentum = 0.9
    accuracy_threshold = 0.6

    def get_majority_input_fn(batch_size, sequence_length, seed=None):
      random_seed.set_random_seed(seed)

      def input_fn():
        random_sequence = random_ops.random_uniform(
            [batch_size, sequence_length], 0, 2, dtype=dtypes.int32, seed=seed)
        inputs = array_ops.expand_dims(
            math_ops.cast(random_sequence, dtypes.float32), 2)
        labels = math_ops.cast(
            array_ops.squeeze(
                math_ops.reduce_sum(inputs, axis=[1]) > (
                    sequence_length / 2.0)),
            dtypes.int32)
        return {'inputs': inputs}, labels

      return input_fn

    seq_columns = [
        feature_column.real_valued_column(
            'inputs', dimension=cell_size)
    ]
    config = run_config.RunConfig(tf_random_seed=77)
    sequence_estimator = dynamic_rnn_estimator.DynamicRnnEstimator(
        problem_type=constants.ProblemType.CLASSIFICATION,
        prediction_type=rnn_common.PredictionType.SINGLE_VALUE,
        num_classes=2,
        num_units=cell_size,
        sequence_feature_columns=seq_columns,
        cell_type=cell_type,
        optimizer=optimizer_type,
        learning_rate=learning_rate,
        momentum=momentum,
        config=config,
        predict_probabilities=True)

    train_input_fn = get_majority_input_fn(batch_size, sequence_length, 1111)
    eval_input_fn = get_majority_input_fn(batch_size, sequence_length, 2222)

    sequence_estimator.fit(input_fn=train_input_fn, steps=train_steps)
    evaluation = sequence_estimator.evaluate(
        input_fn=eval_input_fn, steps=eval_steps)
    accuracy = evaluation['accuracy']
    self.assertGreater(accuracy, accuracy_threshold,
                       'Accuracy should be higher than {}; got {}'.format(
                           accuracy_threshold, accuracy))

    # Testing `predict` when `predict_probabilities=True`.
    prediction_dict = sequence_estimator.predict(
        input_fn=eval_input_fn, as_iterable=False)
    self.assertListEqual(
        sorted(list(prediction_dict.keys())),
        sorted([
            prediction_key.PredictionKey.CLASSES,
            prediction_key.PredictionKey.PROBABILITIES,
            dynamic_rnn_estimator._get_state_name(0),
            dynamic_rnn_estimator._get_state_name(1)
        ]))
    predictions = prediction_dict[prediction_key.PredictionKey.CLASSES]
    probabilities = prediction_dict[
        prediction_key.PredictionKey.PROBABILITIES]
    self.assertListEqual(list(predictions.shape), [batch_size])
    self.assertListEqual(list(probabilities.shape), [batch_size, 2])
    def testForcedInitialSplits(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 3

        initial_subtree = """
            nodes {
              dense_float_binary_split {
                feature_column: 0
                threshold: -0.5
                left_id: 1
                right_id: 2
              }
              node_metadata {
                gain: 0
              }
            }
            nodes {
              dense_float_binary_split {
                feature_column: 1
                threshold: 0.52
                left_id: 3
                right_id: 4
              }
              node_metadata {
                gain: 0
              }
            }
            nodes {
              dense_float_binary_split {
                feature_column: 1
                threshold: 0.554
                left_id: 5
                right_id: 6
              }
              node_metadata {
                gain: 0
              }
            }
            nodes {
              leaf {
                vector {
                  value: 0.0
                }
              }
            }
            nodes {
              leaf {
                vector {
                  value: 0.0
                }
              }
            }
            nodes {
              leaf {
                vector {
                  value: 0.0
                }
              }
            }
            nodes {
              leaf {
                vector {
                  value: 0.0
                }
              }
            }
    """
        tree_proto = tree_config_pb2.DecisionTreeConfig()
        text_format.Merge(initial_subtree, tree_proto)

        # Set initial subtree info.
        learner_config.each_tree_start.CopyFrom(tree_proto)
        learner_config.each_tree_start_num_layers = 2

        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=2,
            examples_per_layer=6,
            model_dir=model_dir,
            config=config,
            center_bias=False,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=False)

        classifier.fit(input_fn=_train_input_fn, steps=100)
        # When no override of global steps, 5 steps were used.
        ensemble = self._assert_checkpoint_and_return_model(
            classifier.model_dir, global_step=6)

        # TODO(nponomareva): find a better way to test this.
        expected_ensemble = """
      trees {
        nodes {
          dense_float_binary_split {
            threshold: -0.5
            left_id: 1
            right_id: 2
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.52
            left_id: 3
            right_id: 4
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.554
            left_id: 5
            right_id: 6
          }
          node_metadata {
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          dense_float_binary_split {
            threshold: 1.0
            left_id: 7
            right_id: 8
          }
          node_metadata {
            gain: 0.888888895512
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: -2.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 2.00000023842
            }
          }
        }
      }
      trees {
        nodes {
          dense_float_binary_split {
            threshold: -0.5
            left_id: 1
            right_id: 2
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.52
            left_id: 3
            right_id: 4
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.554
            left_id: 5
            right_id: 6
          }
          node_metadata {
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          dense_float_binary_split {
            threshold: 1.0
            left_id: 7
            right_id: 8
          }
          node_metadata {
            gain: 0.727760672569
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: -1.81873059273
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 1.81873047352
            }
          }
        }
      }
      trees {
        nodes {
          dense_float_binary_split {
            threshold: -0.5
            left_id: 1
            right_id: 2
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.52
            left_id: 3
            right_id: 4
          }
          node_metadata {
          }
        }
        nodes {
          dense_float_binary_split {
            feature_column: 1
            threshold: 0.554
            left_id: 5
            right_id: 6
          }
          node_metadata {
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
        nodes {
          leaf {
            vector {
              value: 0.0
            }
          }
        }
      }
      tree_weights: 0.10000000149
      tree_weights: 0.10000000149
      tree_weights: 0.10000000149
      tree_metadata {
        num_tree_weight_updates: 1
        num_layers_grown: 3
        is_finalized: true
      }
      tree_metadata {
        num_tree_weight_updates: 1
        num_layers_grown: 3
        is_finalized: true
      }
      tree_metadata {
        num_tree_weight_updates: 1
        num_layers_grown: 2
      }
      growing_metadata {
        num_layers_attempted: 3
      }
    """
        self.assertProtoEquals(expected_ensemble, ensemble)
Exemple #22
0
    def testPrepareFeaturesForSQSS(self):
        mode = model_fn_lib.ModeKeys.TRAIN
        seq_feature_name = 'seq_feature'
        sparse_seq_feature_name = 'wire_cast'
        ctx_feature_name = 'ctx_feature'
        sequence_length = 4
        embedding_dimension = 8

        features = {
            sparse_seq_feature_name:
            sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1,
                                                            0], [1, 0, 0],
                                                [1, 1, 0], [1, 1, 1],
                                                [2, 0, 0], [2, 1, 1]],
                                       values=[
                                           b'marlo', b'stringer', b'omar',
                                           b'stringer', b'marlo', b'marlo',
                                           b'omar'
                                       ],
                                       dense_shape=[3, 2, 2]),
            seq_feature_name:
            constant_op.constant(1.0, shape=[sequence_length]),
            ctx_feature_name:
            constant_op.constant(2.0)
        }

        labels = constant_op.constant(5.0, shape=[sequence_length])

        wire_cast = feature_column.sparse_column_with_keys(
            'wire_cast', ['marlo', 'omar', 'stringer'])
        sequence_feature_columns = [
            feature_column.real_valued_column(seq_feature_name, dimension=1),
            feature_column.embedding_column(
                wire_cast,
                dimension=embedding_dimension,
                initializer=init_ops.ones_initializer())
        ]

        context_feature_columns = [
            feature_column.real_valued_column(ctx_feature_name, dimension=1)
        ]

        expected_sequence = {
            rnn_common.RNNKeys.LABELS_KEY:
            np.array([5., 5., 5., 5.]),
            seq_feature_name:
            np.array([1., 1., 1., 1.]),
            sparse_seq_feature_name:
            sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1,
                                                            0], [1, 0, 0],
                                                [1, 1, 0], [1, 1, 1],
                                                [2, 0, 0], [2, 1, 1]],
                                       values=[
                                           b'marlo', b'stringer', b'omar',
                                           b'stringer', b'marlo', b'marlo',
                                           b'omar'
                                       ],
                                       dense_shape=[3, 2, 2]),
        }

        expected_context = {ctx_feature_name: 2.}

        sequence, context = ssre._prepare_features_for_sqss(
            features, labels, mode, sequence_feature_columns,
            context_feature_columns)

        def assert_equal(expected, got):
            self.assertEqual(sorted(expected), sorted(got))
            for k, v in expected.items():
                if isinstance(v, sparse_tensor.SparseTensor):
                    self.assertAllEqual(v.values.eval(), got[k].values)
                    self.assertAllEqual(v.indices.eval(), got[k].indices)
                    self.assertAllEqual(v.dense_shape.eval(),
                                        got[k].dense_shape)
                else:
                    self.assertAllEqual(v, got[k])

        with self.cached_session() as sess:
            sess.run(variables.global_variables_initializer())
            sess.run(lookup_ops.tables_initializer())
            actual_sequence, actual_context = sess.run([sequence, context])
            assert_equal(expected_sequence, actual_sequence)
            assert_equal(expected_context, actual_context)
Exemple #23
0
  def testLearnMean(self):
    """Test learning to calculate a mean."""
    batch_size = 16
    sequence_length = 3
    train_steps = 200
    eval_steps = 20
    cell_type = 'basic_rnn'
    cell_size = 8
    optimizer_type = 'Momentum'
    learning_rate = 0.1
    momentum = 0.9
    loss_threshold = 0.1

    def get_mean_input_fn(batch_size, sequence_length, seed=None):

      def input_fn():
        # Create examples by choosing 'centers' and adding uniform noise.
        centers = math_ops.matmul(
            random_ops.random_uniform(
                [batch_size, 1], -0.75, 0.75, dtype=dtypes.float32, seed=seed),
            array_ops.ones([1, sequence_length]))
        noise = random_ops.random_uniform(
            [batch_size, sequence_length],
            -0.25,
            0.25,
            dtype=dtypes.float32,
            seed=seed)
        sequences = centers + noise

        inputs = array_ops.expand_dims(sequences, 2)
        labels = math_ops.reduce_mean(sequences, axis=[1])
        return {'inputs': inputs}, labels

      return input_fn

    seq_columns = [
        feature_column.real_valued_column(
            'inputs', dimension=cell_size)
    ]
    config = run_config.RunConfig(tf_random_seed=6)
    sequence_estimator = dynamic_rnn_estimator.DynamicRnnEstimator(
        problem_type=constants.ProblemType.LINEAR_REGRESSION,
        prediction_type=rnn_common.PredictionType.SINGLE_VALUE,
        num_units=cell_size,
        sequence_feature_columns=seq_columns,
        cell_type=cell_type,
        optimizer=optimizer_type,
        learning_rate=learning_rate,
        momentum=momentum,
        config=config)

    train_input_fn = get_mean_input_fn(batch_size, sequence_length, 121)
    eval_input_fn = get_mean_input_fn(batch_size, sequence_length, 212)

    sequence_estimator.fit(input_fn=train_input_fn, steps=train_steps)
    evaluation = sequence_estimator.evaluate(
        input_fn=eval_input_fn, steps=eval_steps)
    loss = evaluation['loss']
    self.assertLess(loss, loss_threshold,
                    'Loss should be less than {}; got {}'.format(loss_threshold,
                                                                 loss))
Exemple #24
0
from astronet.contrib.layers.python.layers import feature_column
from astronet.contrib.learn.python.learn.utils import export
from astronet.contrib.session_bundle import exporter
from astronet.contrib.session_bundle import manifest_pb2
from tensorflow.python.client import session
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.platform import gfile
from tensorflow.python.platform import test
from tensorflow.python.training import saver

_X_KEY = 'my_x_key'

_X_COLUMN = feature_column.real_valued_column(_X_KEY, dimension=1)


def _training_input_fn():
    x = random_ops.random_uniform(shape=(1, ), minval=0.0, maxval=1000.0)
    y = 2 * x + 3
    return {_X_KEY: x}, y


class ExportTest(test.TestCase):
    def _get_default_signature(self, export_meta_filename):
        """ Gets the default signature from the export.meta file. """
        with session.Session():
            save = saver.import_meta_graph(export_meta_filename)
            meta_graph_def = save.export_meta_graph()
            collection_def = meta_graph_def.collection_def
Exemple #25
0
    def testExport(self):
        input_feature_key = 'magic_input_feature_key'
        batch_size = 8
        num_units = [4]
        sequence_length = 10
        num_unroll = 2
        num_classes = 2

        seq_columns = [
            feature_column.real_valued_column('inputs', dimension=4)
        ]

        def get_input_fn(mode, seed):
            def input_fn():
                features = {}
                random_sequence = random_ops.random_uniform(
                    [sequence_length + 1], 0, 2, dtype=dtypes.int32, seed=seed)
                labels = array_ops.slice(random_sequence, [0],
                                         [sequence_length])
                inputs = math_ops.cast(
                    array_ops.slice(random_sequence, [1], [sequence_length]),
                    dtypes.float32)
                features = {'inputs': inputs}

                if mode == model_fn_lib.ModeKeys.INFER:
                    input_examples = array_ops.placeholder(dtypes.string)
                    features[input_feature_key] = input_examples
                    labels = None
                return features, labels

            return input_fn

        model_dir = tempfile.mkdtemp()

        def estimator_fn():
            return ssre.StateSavingRnnEstimator(
                constants.ProblemType.CLASSIFICATION,
                num_units=num_units,
                num_unroll=num_unroll,
                batch_size=batch_size,
                sequence_feature_columns=seq_columns,
                num_classes=num_classes,
                predict_probabilities=True,
                model_dir=model_dir,
                queue_capacity=2 + batch_size,
                seed=1234)

        # Train a bit to create an exportable checkpoint.
        estimator_fn().fit(input_fn=get_input_fn(model_fn_lib.ModeKeys.TRAIN,
                                                 seed=1234),
                           steps=100)
        # Now export, but from a fresh estimator instance, like you would
        # in an export binary. That means .export() has to work without
        # .fit() being called on the same object.
        export_dir = tempfile.mkdtemp()
        print('Exporting to', export_dir)
        estimator_fn().export(export_dir,
                              input_fn=get_input_fn(
                                  model_fn_lib.ModeKeys.INFER, seed=4321),
                              use_deprecated_input_fn=False,
                              input_feature_key=input_feature_key)
Exemple #26
0
    def testLearnShiftByOne(self):
        """Tests that learning a 'shift-by-one' example.

    Each label sequence consists of the input sequence 'shifted' by one place.
    The RNN must learn to 'remember' the previous input.
    """
        batch_size = 16
        num_classes = 2
        num_unroll = 32
        sequence_length = 32
        train_steps = 300
        eval_steps = 20
        num_units = [4]
        learning_rate = 0.5
        accuracy_threshold = 0.9

        def get_shift_input_fn(sequence_length, seed=None):
            def input_fn():
                random_sequence = random_ops.random_uniform(
                    [sequence_length + 1], 0, 2, dtype=dtypes.int32, seed=seed)
                labels = array_ops.slice(random_sequence, [0],
                                         [sequence_length])
                inputs = math_ops.cast(
                    array_ops.slice(random_sequence, [1], [sequence_length]),
                    dtypes.float32)
                return {'inputs': inputs}, labels

            return input_fn

        seq_columns = [
            feature_column.real_valued_column('inputs', dimension=1)
        ]
        config = run_config.RunConfig(tf_random_seed=21212)
        sequence_estimator = ssre.StateSavingRnnEstimator(
            constants.ProblemType.CLASSIFICATION,
            num_units=num_units,
            cell_type='lstm',
            num_unroll=num_unroll,
            batch_size=batch_size,
            sequence_feature_columns=seq_columns,
            num_classes=num_classes,
            learning_rate=learning_rate,
            config=config,
            predict_probabilities=True,
            queue_capacity=2 + batch_size,
            seed=1234)

        train_input_fn = get_shift_input_fn(sequence_length, seed=12321)
        eval_input_fn = get_shift_input_fn(sequence_length, seed=32123)

        sequence_estimator.fit(input_fn=train_input_fn, steps=train_steps)

        evaluation = sequence_estimator.evaluate(input_fn=eval_input_fn,
                                                 steps=eval_steps)
        accuracy = evaluation['accuracy']
        self.assertGreater(
            accuracy, accuracy_threshold,
            'Accuracy should be higher than {}; got {}'.format(
                accuracy_threshold, accuracy))

        # Testing `predict` when `predict_probabilities=True`.
        prediction_dict = sequence_estimator.predict(input_fn=eval_input_fn,
                                                     as_iterable=False)
        self.assertListEqual(
            sorted(list(prediction_dict.keys())),
            sorted([
                prediction_key.PredictionKey.CLASSES,
                prediction_key.PredictionKey.PROBABILITIES,
                ssre._get_state_name(0)
            ]))
        predictions = prediction_dict[prediction_key.PredictionKey.CLASSES]
        probabilities = prediction_dict[
            prediction_key.PredictionKey.PROBABILITIES]
        self.assertListEqual(list(predictions.shape),
                             [batch_size, sequence_length])
        self.assertListEqual(list(probabilities.shape),
                             [batch_size, sequence_length, 2])
Exemple #27
0
  def testSdcaOptimizerSparseFeaturesWithL1Reg(self):
    """SDCALinearRegressor works with sparse features and L1 regularization."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([0.4, 0.6, 0.3]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[10.0], [10.0], [10.0]])
      }, constant_op.constant([[1.4], [-0.8], [2.6]])

    with self._single_threaded_test_session():
      price = feature_column_lib.real_valued_column('price')
      country = feature_column_lib.sparse_column_with_hash_bucket(
          'country', hash_bucket_size=5)
      # Regressor with no L1 regularization.
      regressor = sdca_estimator.SDCALinearRegressor(
          example_id_column='example_id',
          feature_columns=[price, country],
          weight_column_name='weights')
      regressor.fit(input_fn=input_fn, steps=20)
      no_l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
      variable_names = regressor.get_variable_names()
      self.assertIn('linear/price/weight', variable_names)
      self.assertIn('linear/country/weights', variable_names)
      no_l1_reg_weights = {
          'linear/price/weight':
              regressor.get_variable_value('linear/price/weight'),
          'linear/country/weights':
              regressor.get_variable_value('linear/country/weights'),
      }

      # Regressor with L1 regularization.
      regressor = sdca_estimator.SDCALinearRegressor(
          example_id_column='example_id',
          feature_columns=[price, country],
          l1_regularization=1.0,
          weight_column_name='weights')
      regressor.fit(input_fn=input_fn, steps=20)
      l1_reg_loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
      l1_reg_weights = {
          'linear/price/weight':
              regressor.get_variable_value('linear/price/weight'),
          'linear/country/weights':
              regressor.get_variable_value('linear/country/weights'),
      }

      # Unregularized loss is lower when there is no L1 regularization.
      self.assertLess(no_l1_reg_loss, l1_reg_loss)
      self.assertLess(no_l1_reg_loss, 0.05)

      # But weights returned by the regressor with L1 regularization have
      # smaller L1 norm.
      l1_reg_weights_norm, no_l1_reg_weights_norm = 0.0, 0.0
      for var_name in sorted(l1_reg_weights):
        l1_reg_weights_norm += sum(
            np.absolute(l1_reg_weights[var_name].flatten()))
        no_l1_reg_weights_norm += sum(
            np.absolute(no_l1_reg_weights[var_name].flatten()))
        print('Var name: %s, value: %s' %
              (var_name, no_l1_reg_weights[var_name].flatten()))
      self.assertLess(l1_reg_weights_norm, no_l1_reg_weights_norm)
Exemple #28
0
  def testMultipleRuns(self):
    """Tests resuming training by feeding state."""
    cell_sizes = [4, 7]
    batch_size = 11
    learning_rate = 0.1
    train_sequence_length = 21
    train_steps = 121
    dropout_keep_probabilities = [0.5, 0.5, 0.5]
    prediction_steps = [3, 2, 5, 11, 6]

    def get_input_fn(batch_size, sequence_length, state_dict, starting_step=0):

      def input_fn():
        sequence = constant_op.constant(
            [[(starting_step + i + j) % 2 for j in range(sequence_length + 1)]
             for i in range(batch_size)],
            dtype=dtypes.int32)
        labels = array_ops.slice(sequence, [0, 0],
                                 [batch_size, sequence_length])
        inputs = array_ops.expand_dims(
            math_ops.cast(
                array_ops.slice(sequence, [0, 1], [batch_size, sequence_length
                                                  ]),
                dtypes.float32), 2)
        input_dict = state_dict
        input_dict['inputs'] = inputs
        return input_dict, labels

      return input_fn

    seq_columns = [feature_column.real_valued_column('inputs', dimension=1)]
    config = run_config.RunConfig(tf_random_seed=21212)

    model_dir = tempfile.mkdtemp()
    sequence_estimator = dynamic_rnn_estimator.DynamicRnnEstimator(
        problem_type=constants.ProblemType.CLASSIFICATION,
        prediction_type=rnn_common.PredictionType.MULTIPLE_VALUE,
        num_classes=2,
        sequence_feature_columns=seq_columns,
        num_units=cell_sizes,
        cell_type='lstm',
        dropout_keep_probabilities=dropout_keep_probabilities,
        learning_rate=learning_rate,
        config=config,
        model_dir=model_dir)

    train_input_fn = get_input_fn(
        batch_size, train_sequence_length, state_dict={})

    sequence_estimator.fit(input_fn=train_input_fn, steps=train_steps)

    def incremental_predict(estimator, increments):
      """Run `estimator.predict` for `i` steps for `i` in `increments`."""
      step = 0
      incremental_state_dict = {}
      for increment in increments:
        input_fn = get_input_fn(
            batch_size,
            increment,
            state_dict=incremental_state_dict,
            starting_step=step)
        prediction_dict = estimator.predict(
            input_fn=input_fn, as_iterable=False)
        step += increment
        incremental_state_dict = {
            k: v
            for (k, v) in prediction_dict.items()
            if k.startswith(rnn_common.RNNKeys.STATE_PREFIX)
        }
      return prediction_dict

    pred_all_at_once = incremental_predict(sequence_estimator,
                                           [sum(prediction_steps)])
    pred_step_by_step = incremental_predict(sequence_estimator,
                                            prediction_steps)

    # Check that the last `prediction_steps[-1]` steps give the same
    # predictions.
    np.testing.assert_array_equal(
        pred_all_at_once[prediction_key.PredictionKey.CLASSES]
        [:, -1 * prediction_steps[-1]:],
        pred_step_by_step[prediction_key.PredictionKey.CLASSES],
        err_msg='Mismatch on last {} predictions.'.format(prediction_steps[-1]))
    # Check that final states are identical.
    for k, v in pred_all_at_once.items():
      if k.startswith(rnn_common.RNNKeys.STATE_PREFIX):
        np.testing.assert_array_equal(
            v, pred_step_by_step[k], err_msg='Mismatch on state {}.'.format(k))