Beispiel #1
0
def loadModel():
    global classifier
    classifier = estimator.SKCompat(estimator.Estimator(
        model_fn=news_cnn_model.generate_cnn_model(N_CLASSES, n_words),
        model_dir=MODEL_DIR
    ))
    df = pd.read_csv(CSV_FILE, header=None)

    train_df = df[0:1]
    x_train = train_df[1]
    x_train = np.array(list(vocab_processor.transform(x_train)), dtype=int)
    y_train = np.array(train_df[0], dtype=int)
    classifier.score(x_train, y_train)

    print 'Model updated'
 def testModelFnWithModelDir(self):
   expected_param = {'some_param': 'some_value'}
   expected_model_dir = tempfile.mkdtemp()
   def _argument_checker(features, labels, mode, params, config=None,
                         model_dir=None):
     _, _, _ = features, labels, config
     # self.assertEqual(model_fn.ModeKeys.TRAIN, mode)
     # self.assertEqual(expected_param, params)
     # self.assertEqual(model_dir, expected_model_dir)
     return constant_op.constant(0.), constant_op.constant(
         0.), constant_op.constant(0.)
   est = estimator.Estimator(model_fn=_argument_checker,
                             params=expected_param,
                             model_dir=expected_model_dir)
   est.fit(input_fn=boston_input_fn, steps=1)
 def testIrisIterator(self):
   iris = base.load_iris()
   est = estimator.Estimator(model_fn=logistic_model_no_mode_fn)
   x_iter = itertools.islice(iris.data, 100)
   y_iter = itertools.islice(iris.target, 100)
   estimator.SKCompat(est).fit(x_iter, y_iter, steps=20)
   eval_result = est.evaluate(input_fn=iris_input_fn, steps=1)
   x_iter_eval = itertools.islice(iris.data, 100)
   y_iter_eval = itertools.islice(iris.target, 100)
   score_result = estimator.SKCompat(est).score(x_iter_eval, y_iter_eval)
   print(score_result)
   self.assertItemsEqual(eval_result.keys(), score_result.keys())
   self.assertItemsEqual(['global_step', 'loss'], score_result.keys())
   predictions = estimator.SKCompat(est).predict(x=iris.data)['class']
   self.assertEqual(len(predictions), iris.target.shape[0])
Beispiel #4
0
    def testInvalidModelFn_no_loss(self):
        def _invalid_model_fn(features, labels, mode):
            # pylint: disable=unused-argument
            w = variables_lib.Variable(42.0, 'weight')
            loss = 100.0 - w
            train_op = w.assign_add(loss / 100.0)
            predictions = loss
            if mode == model_fn.ModeKeys.EVAL:
                loss = None
            return predictions, loss, train_op

        est = estimator.Estimator(model_fn=_invalid_model_fn)
        est.fit(input_fn=boston_input_fn, steps=1)
        with self.assertRaisesRegexp(ValueError, 'Missing loss'):
            est.evaluate(input_fn=boston_eval_fn, steps=1)
 def testBostonAllDictionaryInput(self):
   boston = base.load_boston()
   est = estimator.Estimator(model_fn=linear_model_fn)
   boston_input = {'input': boston.data}
   float64_target = {'labels': boston.target.astype(np.float64)}
   est.fit(x=boston_input, y=float64_target, steps=100)
   scores = est.evaluate(
       x=boston_input,
       y=float64_target,
       metrics={'MSE': metric_ops.streaming_mean_squared_error})
   predictions = np.array(list(est.predict(x=boston_input)))
   other_score = _sklearn.mean_squared_error(predictions, boston.target)
   self.assertAllClose(other_score, scores['MSE'])
   self.assertTrue('global_step' in scores)
   self.assertEqual(scores['global_step'], 100)
 def testIrisInputFnLabelsDict(self):
     iris = base.load_iris()
     est = estimator.Estimator(model_fn=logistic_model_no_mode_fn)
     est.fit(input_fn=iris_input_fn_labels_dict, steps=100)
     _ = est.evaluate(input_fn=iris_input_fn_labels_dict,
                      steps=1,
                      metrics={
                          'accuracy':
                          metric_spec.MetricSpec(
                              metric_fn=metric_ops.streaming_accuracy,
                              prediction_key='class',
                              label_key='labels')
                      })
     predictions = list(est.predict(x=iris.data))
     self.assertEqual(len(predictions), iris.target.shape[0])
    def testCustomConfig(self):
        test_random_seed = 5783452

        class TestInput(object):
            def __init__(self):
                self.random_seed = 0

            def config_test_input_fn(self):
                self.random_seed = ops.get_default_graph().seed
                return constant_op.constant([[1.]]), constant_op.constant([1.])

        config = run_config.RunConfig(tf_random_seed=test_random_seed)
        test_input = TestInput()
        est = estimator.Estimator(model_fn=linear_model_fn, config=config)
        est.fit(input_fn=test_input.config_test_input_fn, steps=1)
        # If input_fn ran, it will have given us the random seed set on the graph.
        self.assertEquals(test_random_seed, test_input.random_seed)
    def testModelFnArgs(self):
        expected_param = {'some_param': 'some_value'}
        expected_config = run_config.RunConfig()
        expected_config.i_am_test = True

        def _argument_checker(features, labels, mode, params, config):
            _, _ = features, labels
            self.assertEqual(model_fn.ModeKeys.TRAIN, mode)
            self.assertEqual(expected_param, params)
            self.assertTrue(config.i_am_test)
            return constant_op.constant(0.), constant_op.constant(
                0.), constant_op.constant(0.)

        est = estimator.Estimator(model_fn=_argument_checker,
                                  params=expected_param,
                                  config=expected_config)
        est.fit(input_fn=boston_input_fn, steps=1)
def _build_estimator_for_resource_export_test():
    def _input_fn():
        iris = base.load_iris()
        return {
            'feature': constant_op.constant(iris.data, dtype=dtypes.float32)
        }, constant_op.constant(iris.target, shape=[150], dtype=dtypes.int32)

    feature_columns = [
        feature_column_lib.real_valued_column('feature', dimension=4)
    ]

    def resource_constant_model_fn(unused_features, unused_labels, mode):
        """A model_fn that loads a constant from a resource and serves it."""
        assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
                        model_fn.ModeKeys.INFER)

        const = constant_op.constant(-1, dtype=dtypes.int64)
        table = lookup.MutableHashTable(dtypes.string,
                                        dtypes.int64,
                                        const,
                                        name='LookupTableModel')
        if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
            key = constant_op.constant(['key'])
            value = constant_op.constant([42], dtype=dtypes.int64)
            train_op_1 = table.insert(key, value)
            training_state = lookup.MutableHashTable(
                dtypes.string,
                dtypes.int64,
                const,
                name='LookupTableTrainingState')
            training_op_2 = training_state.insert(key, value)
            return const, const, control_flow_ops.group(
                train_op_1, training_op_2)
        if mode == model_fn.ModeKeys.INFER:
            key = constant_op.constant(['key'])
            prediction = table.lookup(key)
            return prediction, const, control_flow_ops.no_op()

    est = estimator.Estimator(model_fn=resource_constant_model_fn)
    est.fit(input_fn=_input_fn, steps=1)

    feature_spec = feature_column_lib.create_feature_spec_for_parsing(
        feature_columns)
    serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(
        feature_spec)
    return est, serving_input_fn
    def testInvalidModelFn_no_prediction(self):
        def _invalid_model_fn(features, labels):
            # pylint: disable=unused-argument
            w = variables_lib.Variable(42.0, 'weight')
            loss = 100.0 - w
            train_op = w.assign_add(loss / 100.0)
            return None, loss, train_op

        est = estimator.Estimator(model_fn=_invalid_model_fn)
        est.fit(input_fn=boston_input_fn, steps=1)
        with self.assertRaisesRegexp(ValueError, 'Missing prediction'):
            est.evaluate(input_fn=boston_eval_fn, steps=1)
        with self.assertRaisesRegexp(ValueError, 'Missing prediction'):
            est.predict(input_fn=boston_input_fn)
        with self.assertRaisesRegexp(ValueError, 'Missing prediction'):
            est.predict(input_fn=functools.partial(boston_input_fn,
                                                   num_epochs=1),
                        as_iterable=True)
    def testModelFnScaffold(self):
        self.is_init_fn_called = False

        def _init_fn(scaffold, session):
            _, _ = scaffold, session
            self.is_init_fn_called = True

        def _model_fn_scaffold(features, labels, mode):
            _, _ = features, labels
            return model_fn.ModelFnOps(
                mode=mode,
                predictions=constant_op.constant(0.),
                loss=constant_op.constant(0.),
                train_op=constant_op.constant(0.),
                scaffold=monitored_session.Scaffold(init_fn=_init_fn))

        est = estimator.Estimator(model_fn=_model_fn_scaffold)
        est.fit(input_fn=boston_input_fn, steps=1)
        self.assertTrue(self.is_init_fn_called)
def main(unused_argv):
    # global n_words
    # Prepare training and testing data
    dbpedia = learn.datasets.load_dataset(
        'dbpedia', test_with_fake_data=False)  #FLAGS.test_with_fake_data)
    x_train = pandas.DataFrame(dbpedia.train.data)[1]
    y_train = pandas.Series(dbpedia.train.target)
    x_test = pandas.DataFrame(dbpedia.test.data)[1]
    y_test = pandas.Series(dbpedia.test.target)

    if FLAGS.embeddings:
        model_, vocabulary_, x_transform_train, x_transform_test = process_emb(
            x_train, x_test)
    else:
        model_, vocabulary_, x_transform_train, x_transform_test = process_cat(
            x_train, x_test)

    x_train = np.array(list(x_transform_train))
    x_test = np.array(list(x_transform_test))

    setting.n_words = len(vocabulary_)

    print('Total words: %d' % setting.n_words)
    print('x_train shape: ' + str(x_train.shape))
    print('x_test shape: ' + str(x_test.shape))

    # Build model
    # Switch between rnn_model and bag_of_words_model to test different models.
    model_fn = rnn_model
    if FLAGS.bow_model:
        model_fn = model_
    else:
        model_fn = rnn_model

    classifier = estimator.Estimator(model_fn=model_fn)

    # Train and predict
    estimator.SKCompat(classifier).fit(x_train, y_train, steps=100)
    y_predicted = [
        p['class'] for p in classifier.predict(x_test, as_iterable=True)
    ]
    score = metrics.accuracy_score(y_test, y_predicted)
    print('Accuracy: {0:f}'.format(score))
 def testCheckInputs(self):
     est = estimator.SKCompat(estimator.Estimator(model_fn=linear_model_fn))
     # Lambdas so we have to different objects to compare
     right_features = lambda: np.ones(shape=[7, 8], dtype=np.float32)
     right_labels = lambda: np.ones(shape=[7, 10], dtype=np.int32)
     est.fit(right_features(), right_labels(), steps=1)
     # TODO(wicke): This does not fail for np.int32 because of data_feeder magic.
     wrong_type_features = np.ones(shape=[7, 8], dtype=np.int64)
     wrong_size_features = np.ones(shape=[7, 10])
     wrong_type_labels = np.ones(shape=[7, 10], dtype=np.float32)
     wrong_size_labels = np.ones(shape=[7, 11])
     est.fit(x=right_features(), y=right_labels(), steps=1)
     with self.assertRaises(ValueError):
         est.fit(x=wrong_type_features, y=right_labels(), steps=1)
     with self.assertRaises(ValueError):
         est.fit(x=wrong_size_features, y=right_labels(), steps=1)
     with self.assertRaises(ValueError):
         est.fit(x=right_features(), y=wrong_type_labels, steps=1)
     with self.assertRaises(ValueError):
         est.fit(x=right_features(), y=wrong_size_labels, steps=1)
 def testIrisAll(self):
   iris = base.load_iris()
   est = estimator.SKCompat(
       estimator.Estimator(model_fn=logistic_model_no_mode_fn))
   est.fit(iris.data, iris.target, steps=100)
   scores = est.score(
       x=iris.data,
       y=iris.target,
       metrics={('accuracy', 'class'): metric_ops.streaming_accuracy})
   predictions = est.predict(x=iris.data)
   predictions_class = est.predict(x=iris.data, outputs=['class'])['class']
   self.assertEqual(predictions['prob'].shape[0], iris.target.shape[0])
   self.assertAllClose(predictions['class'], predictions_class)
   self.assertAllClose(
       predictions['class'], np.argmax(
           predictions['prob'], axis=1))
   other_score = _sklearn.accuracy_score(iris.target, predictions['class'])
   self.assertAllClose(scores['accuracy'], other_score)
   self.assertTrue('global_step' in scores)
   self.assertEqual(100, scores['global_step'])
Beispiel #15
0
    def testFeatureEngineeringFn(self):
        def input_fn():
            return {
                "x": constant_op.constant([1.])
            }, {
                "y": constant_op.constant([11.])
            }

        def feature_engineering_fn(features, labels):
            _, _ = features, labels
            return {
                "transformed_x": constant_op.constant([9.])
            }, {
                "transformed_y": constant_op.constant([99.])
            }

        def model_fn(features, labels):
            # dummy variable:
            _ = variables_lib.Variable([0.])
            _ = labels
            predictions = features["transformed_x"]
            loss = constant_op.constant([2.])
            update_global_step = variables.get_global_step().assign_add(1)
            return predictions, loss, update_global_step

        estimator = estimator_lib.Estimator(
            model_fn=model_fn, feature_engineering_fn=feature_engineering_fn)
        estimator.fit(input_fn=input_fn, steps=1)
        prediction = next(
            estimator.predict(input_fn=input_fn, as_iterable=True))
        # predictions = transformed_x (9)
        self.assertEqual(9., prediction)
        metrics = estimator.evaluate(
            input_fn=input_fn,
            steps=1,
            metrics={
                "label":
                metric_spec.MetricSpec(lambda predictions, labels: labels)
            })
        # labels = transformed_y (99)
        self.assertEqual(99., metrics["label"])
Beispiel #16
0
    def testFeatureEngineeringFnWithSameName(self):
        def input_fn():
            return {
                "x": constant_op.constant(["9."])
            }, {
                "y": constant_op.constant(["99."])
            }

        def feature_engineering_fn(features, labels):
            # Github #12205: raise a TypeError if called twice.
            _ = string_ops.string_split(features["x"])
            features["x"] = constant_op.constant([9.])
            labels["y"] = constant_op.constant([99.])
            return features, labels

        def model_fn(features, labels):
            # dummy variable:
            _ = variables_lib.Variable([0.])
            _ = labels
            predictions = features["x"]
            loss = constant_op.constant([2.])
            update_global_step = variables.get_global_step().assign_add(1)
            return predictions, loss, update_global_step

        estimator = estimator_lib.Estimator(
            model_fn=model_fn, feature_engineering_fn=feature_engineering_fn)
        estimator.fit(input_fn=input_fn, steps=1)
        prediction = next(
            estimator.predict(input_fn=input_fn, as_iterable=True))
        # predictions = transformed_x (9)
        self.assertEqual(9., prediction)
        metrics = estimator.evaluate(
            input_fn=input_fn,
            steps=1,
            metrics={
                "label":
                metric_spec.MetricSpec(lambda predictions, labels: labels)
            })
        # labels = transformed_y (99)
        self.assertEqual(99., metrics["label"])
Beispiel #17
0
 def __init__(self,
              params,
              device_assigner=None,
              model_dir=None,
              graph_builder_class=tensor_forest.RandomForestGraphs,
              config=None,
              weights_name=None,
              keys_name=None,
              feature_engineering_fn=None,
              early_stopping_rounds=100):
     self.params = params.fill()
     self.graph_builder_class = graph_builder_class
     self.early_stopping_rounds = early_stopping_rounds
     self._estimator = estimator.Estimator(
         model_fn=get_model_fn(params,
                               graph_builder_class,
                               device_assigner,
                               weights_name=weights_name,
                               keys_name=keys_name),
         model_dir=model_dir,
         config=config,
         feature_engineering_fn=feature_engineering_fn)
  def testModelFnScaffoldSaverUsage(self):

    def _model_fn_scaffold(features, labels, mode):
      _, _ = features, labels
      variables_lib.Variable(1., 'weight')
      real_saver = saver_lib.Saver()
      self.mock_saver = test.mock.Mock(
          wraps=real_saver, saver_def=real_saver.saver_def)
      return model_fn.ModelFnOps(
          mode=mode,
          predictions=constant_op.constant([[1.]]),
          loss=constant_op.constant(0.),
          train_op=constant_op.constant(0.),
          scaffold=monitored_session.Scaffold(saver=self.mock_saver))

    def input_fn():
      return {
          'x': constant_op.constant([[1.]]),
      }, constant_op.constant([[1.]])

    est = estimator.Estimator(model_fn=_model_fn_scaffold)
    est.fit(input_fn=input_fn, steps=1)
    self.assertTrue(self.mock_saver.save.called)
    est.evaluate(input_fn=input_fn, steps=1)
    self.assertTrue(self.mock_saver.restore.called)
    est.predict(input_fn=input_fn)
    self.assertTrue(self.mock_saver.restore.called)
    def serving_input_fn():
      serialized_tf_example = array_ops.placeholder(dtype=dtypes.string,
                                                    shape=[None],
                                                    name='input_example_tensor')
      features, labels = input_fn()
      return input_fn_utils.InputFnOps(
          features, labels, {'examples': serialized_tf_example})

    est.export_savedmodel(est.model_dir + '/export', serving_input_fn)
    self.assertTrue(self.mock_saver.restore.called)
Beispiel #19
0
  def __init__(self,
               num_clusters,
               model_dir=None,
               initial_clusters=clustering_ops.RANDOM_INIT,
               distance_metric=clustering_ops.SQUARED_EUCLIDEAN_DISTANCE,
               random_seed=0,
               use_mini_batch=True,
               kmeans_plus_plus_num_retries=2,
               config=None):
    """Creates a model for running KMeans training and inference.

    Args:
      num_clusters: number of clusters to train.
      model_dir: the directory to save the model results and log files.
      initial_clusters: specifies how to initialize the clusters for training.
        See clustering_ops.kmeans for the possible values.
      distance_metric: the distance metric used for clustering.
        See clustering_ops.kmeans for the possible values.
      random_seed: Python integer. Seed for PRNG used to initialize centers.
      use_mini_batch: If true, use the mini-batch k-means algorithm. Else assume
        full batch.
      kmeans_plus_plus_num_retries: For each point that is sampled during
        kmeans++ initialization, this parameter specifies the number of
        additional points to draw from the current distribution before selecting
        the best. If a negative value is specified, a heuristic is used to
        sample O(log(num_to_sample)) additional points.
      config: See Estimator
    """
    self._num_clusters = num_clusters
    self._training_initial_clusters = initial_clusters
    self._distance_metric = distance_metric
    self._random_seed = random_seed
    self._use_mini_batch = use_mini_batch
    self._kmeans_plus_plus_num_retries = kmeans_plus_plus_num_retries
    self._estimator = estimator.Estimator(
        model_fn=self._get_model_function(), model_dir=model_dir)
 def testIrisAllDictionaryInput(self):
   iris = base.load_iris()
   est = estimator.Estimator(model_fn=logistic_model_no_mode_fn)
   iris_data = {'input': iris.data}
   iris_target = {'labels': iris.target}
   est.fit(iris_data, iris_target, steps=100)
   scores = est.evaluate(
       x=iris_data,
       y=iris_target,
       metrics={('accuracy', 'class'): metric_ops.streaming_accuracy})
   predictions = list(est.predict(x=iris_data))
   predictions_class = list(est.predict(x=iris_data, outputs=['class']))
   self.assertEqual(len(predictions), iris.target.shape[0])
   classes_batch = np.array([p['class'] for p in predictions])
   self.assertAllClose(classes_batch,
                       np.array([p['class'] for p in predictions_class]))
   self.assertAllClose(
       classes_batch,
       np.argmax(
           np.array([p['prob'] for p in predictions]), axis=1))
   other_score = _sklearn.accuracy_score(iris.target, classes_batch)
   self.assertAllClose(other_score, scores['accuracy'])
   self.assertTrue('global_step' in scores)
   self.assertEqual(scores['global_step'], 100)
  def testModelFnArgs(self):
    features = {'x': 42., 'y': 43.}
    labels = 44.
    expected_params = {'some_param': 'some_value'}
    expected_config = run_config.RunConfig()
    expected_config.i_am_test = True

    # TODO(ptucker): We have to roll our own mock since Estimator._get_arguments
    # doesn't work with mock fns.
    model_fn_call_count = [0]

    # `features` and `labels` are passed by position, `arg0` and `arg1` here.
    def _model_fn(arg0, arg1, mode, params, config):
      model_fn_call_count[0] += 1
      # self.assertItemsEqual(features.keys(), arg0.keys())
      # self.assertEqual(model_fn.ModeKeys.TRAIN, mode)
      # self.assertEqual(expected_params, params)
      # self.assertTrue(config.i_am_test)
      return _model_fn_ops(features, labels, arg0, arg1, mode)

    est = estimator.Estimator(
        model_fn=_model_fn, params=expected_params, config=expected_config)
    # self.assertEqual(0, model_fn_call_count[0])
    est.fit(input_fn=_make_input_fn(features, labels), steps=1)
 def testBadInput(self):
   est = estimator.Estimator(model_fn=linear_model_fn)
   self.assertRaisesRegexp(
       ValueError,
       'Either x or input_fn must be provided.',
       est.fit,
       x=None,
       input_fn=None,
       steps=1)
   self.assertRaisesRegexp(
       ValueError,
       'Can not provide both input_fn and x or y',
       est.fit,
       x='X',
       input_fn=iris_input_fn,
       steps=1)
   self.assertRaisesRegexp(
       ValueError,
       'Can not provide both input_fn and x or y',
       est.fit,
       y='Y',
       input_fn=iris_input_fn,
       steps=1)
   self.assertRaisesRegexp(
       ValueError,
       'Can not provide both input_fn and batch_size',
       est.fit,
       input_fn=iris_input_fn,
       batch_size=100,
       steps=1)
   self.assertRaisesRegexp(
       ValueError,
       'Inputs cannot be tensors. Please provide input_fn.',
       est.fit,
       x=constant_op.constant(1.),
       steps=1)
Beispiel #23
0
    def __init__(self,
                 hidden_units,
                 feature_columns,
                 model_dir=None,
                 weight_column_name=None,
                 optimizer=None,
                 activation_fn=nn.relu,
                 dropout=None,
                 gradient_clip_norm=None,
                 enable_centered_bias=False,
                 config=None,
                 feature_engineering_fn=None,
                 label_dimension=1,
                 embedding_lr_multipliers=None):
        """Initializes a `DNNRegressor` instance.

    Args:
      hidden_units: List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      weight_column_name: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm: A `float` > 0. If provided, gradients are clipped
        to their global norm with this clipping ratio. See
        `tf.clip_by_global_norm` for more details.
      enable_centered_bias: A bool. If True, estimator will learn a centered
        bias variable for each class. Rest of the model structure learns the
        residual after centered bias.
      config: `RunConfig` object to configure the runtime settings.
      feature_engineering_fn: Feature engineering function. Takes features and
                        labels which are the output of `input_fn` and
                        returns features and labels which will be fed
                        into the model.
      label_dimension: Dimension of the label for multilabels. Defaults to 1.
      embedding_lr_multipliers: Optional. A dictionary from `EbeddingColumn` to
          a `float` multiplier. Multiplier will be used to multiply with
          learning rate for the embedding variables.

    Returns:
      A `DNNRegressor` estimator.
    """
        self._feature_columns = feature_columns

        self._estimator = estimator.Estimator(
            model_fn=_dnn_model_fn,
            model_dir=model_dir,
            config=config,
            params={
                "head":
                head_lib._regression_head(  # pylint: disable=protected-access
                    label_dimension=label_dimension,
                    weight_column_name=weight_column_name,
                    enable_centered_bias=enable_centered_bias),
                "hidden_units":
                hidden_units,
                "feature_columns":
                feature_columns,
                "optimizer":
                optimizer,
                "activation_fn":
                activation_fn,
                "dropout":
                dropout,
                "gradient_clip_norm":
                gradient_clip_norm,
                "num_ps_replicas":
                config.num_ps_replicas if config else 0,
                "embedding_lr_multipliers":
                embedding_lr_multipliers,
            },
            feature_engineering_fn=feature_engineering_fn)
Beispiel #24
0
    def __init__(self,
                 example_id_column,
                 feature_columns,
                 weight_column_name=None,
                 model_dir=None,
                 l1_regularization=0.0,
                 l2_regularization=0.0,
                 num_loss_partitions=1,
                 kernels=None,
                 config=None,
                 feature_engineering_fn=None):
        """Constructs a `SVM~ estimator object.

    Args:
      example_id_column: A string defining the feature column name representing
        example ids. Used to initialize the underlying optimizer.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      weight_column_name: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      l1_regularization: L1-regularization parameter. Refers to global L1
        regularization (across all examples).
      l2_regularization: L2-regularization parameter. Refers to global L2
        regularization (across all examples).
      num_loss_partitions: number of partitions of the (global) loss function
        optimized by the underlying optimizer (SDCAOptimizer).
      kernels: A list of kernels for the SVM. Currently, no kernels are
        supported. Reserved for future use for non-linear SVMs.
      config: RunConfig object to configure the runtime settings.
      feature_engineering_fn: Feature engineering function. Takes features and
                        labels which are the output of `input_fn` and
                        returns features and labels which will be fed
                        into the model.

    Raises:
      ValueError: if kernels passed is not None.
    """
        if kernels is not None:
            raise ValueError("Kernel SVMs are not currently supported.")
        self._optimizer = sdca_optimizer.SDCAOptimizer(
            example_id_column=example_id_column,
            num_loss_partitions=num_loss_partitions,
            symmetric_l1_regularization=l1_regularization,
            symmetric_l2_regularization=l2_regularization)

        self._feature_columns = feature_columns
        self._model_dir = model_dir or tempfile.mkdtemp()
        self._chief_hook = linear._SdcaUpdateWeightsHook()  # pylint: disable=protected-access
        self._estimator = estimator.Estimator(
            model_fn=linear.sdca_model_fn,
            model_dir=self._model_dir,
            config=config,
            params={
                "head":
                head_lib._binary_svm_head(  # pylint: disable=protected-access
                    weight_column_name=weight_column_name,
                    enable_centered_bias=False),
                "feature_columns":
                feature_columns,
                "optimizer":
                self._optimizer,
                "weight_column_name":
                weight_column_name,
                "update_weights_hook":
                self._chief_hook,
            },
            feature_engineering_fn=feature_engineering_fn)
        if not self._estimator.config.is_chief:
            self._chief_hook = None
    def __init__(
            self,  # _joint_linear_weights pylint: disable=invalid-name
            model_dir=None,
            n_classes=2,
            weight_column_name=None,
            linear_feature_columns=None,
            linear_optimizer=None,
            _joint_linear_weights=False,
            dnn_feature_columns=None,
            dnn_optimizer=None,
            dnn_hidden_units=None,
            dnn_activation_fn=nn.relu,
            dnn_dropout=None,
            gradient_clip_norm=None,
            enable_centered_bias=False,
            config=None,
            feature_engineering_fn=None):
        """Constructs a DNNLinearCombinedClassifier instance.

    Args:
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        Note that class labels are integers representing the class index (i.e.
        values from 0 to n_classes-1). For arbitrary label values (e.g. string
        labels), convert to class indices first.
      weight_column_name: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training.
        It will be multiplied by the loss of the example.
      linear_feature_columns: An iterable containing all the feature columns
        used by linear part of the model. All items in the set must be
        instances of classes derived from `FeatureColumn`.
      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the linear part of the model. If `None`, will use a FTRL optimizer.
      _joint_linear_weights: If True a single (possibly partitioned) variable
        will be used to store the linear model weights. It's faster, but
        requires all columns are sparse and have the 'sum' combiner.
      dnn_feature_columns: An iterable containing all the feature columns used
        by deep part of the model. All items in the set must be instances of
        classes derived from `FeatureColumn`.
      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
        the deep part of the model. If `None`, will use an Adagrad optimizer.
      dnn_hidden_units: List of hidden units per layer. All layers are fully
        connected.
      dnn_activation_fn: Activation function applied to each layer. If `None`,
        will use `tf.nn.relu`.
      dnn_dropout: When not None, the probability we will drop out
        a given coordinate.
      gradient_clip_norm: A float > 0. If provided, gradients are clipped
        to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      enable_centered_bias: A bool. If True, estimator will learn a centered
        bias variable for each class. Rest of the model structure learns the
        residual after centered bias.
      config: RunConfig object to configure the runtime settings.
      feature_engineering_fn: Feature engineering function. Takes features and
                        labels which are the output of `input_fn` and
                        returns features and labels which will be fed
                        into the model.

    Raises:
      ValueError: If `n_classes` < 2.
      ValueError: If both `linear_feature_columns` and `dnn_features_columns`
        are empty at the same time.
    """
        if n_classes < 2:
            raise ValueError(
                "n_classes should be greater than 1. Given: {}".format(
                    n_classes))
        self._linear_optimizer = linear_optimizer or "Ftrl"
        linear_feature_columns = linear_feature_columns or []
        dnn_feature_columns = dnn_feature_columns or []
        self._feature_columns = linear_feature_columns + dnn_feature_columns
        if not self._feature_columns:
            raise ValueError(
                "Either linear_feature_columns or dnn_feature_columns "
                "must be defined.")
        self._dnn_hidden_units = dnn_hidden_units
        self._enable_centered_bias = enable_centered_bias

        head = head_lib._multi_class_head(  # pylint: disable=protected-access
            n_classes=n_classes,
            weight_column_name=weight_column_name,
            enable_centered_bias=enable_centered_bias)
        self._estimator = estimator.Estimator(
            model_fn=_dnn_linear_combined_model_fn,
            model_dir=model_dir,
            config=config,
            params={
                "head": head,
                "linear_feature_columns": linear_feature_columns,
                "linear_optimizer": self._linear_optimizer,
                "joint_linear_weights": _joint_linear_weights,
                "dnn_feature_columns": dnn_feature_columns,
                "dnn_optimizer": dnn_optimizer or "Adagrad",
                "dnn_hidden_units": dnn_hidden_units,
                "dnn_activation_fn": dnn_activation_fn,
                "dnn_dropout": dnn_dropout,
                "gradient_clip_norm": gradient_clip_norm,
                "num_ps_replicas": config.num_ps_replicas if config else 0,
            },
            feature_engineering_fn=feature_engineering_fn)
Beispiel #26
0
    def __init__(
            self,  # _joint_weight pylint: disable=invalid-name
            feature_columns,
            model_dir=None,
            n_classes=2,
            weight_column_name=None,
            optimizer=None,
            gradient_clip_norm=None,
            enable_centered_bias=None,
            _joint_weight=False,
            config=None,
            feature_engineering_fn=None):
        """Construct a `LinearClassifier` estimator object.

    Args:
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator
        to continue training a previously saved model.
      n_classes: number of target classes. Default is binary classification.
      weight_column_name: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: The optimizer used to train the model. If specified, it should
        be either an instance of `tf.Optimizer` or the SDCAOptimizer. If `None`,
        the Ftrl optimizer will be used.
      gradient_clip_norm: A `float` > 0. If provided, gradients are clipped
        to their global norm with this clipping ratio. See
        `tf.clip_by_global_norm` for more details.
      enable_centered_bias: A bool. If True, estimator will learn a centered
        bias variable for each class. Rest of the model structure learns the
        residual after centered bias.
      _joint_weight: If True, the weights for all columns will be stored in a
        single (possibly partitioned) variable. It's more efficient, but it's
        incompatible with SDCAOptimizer, and requires all feature columns are
        sparse and use the 'sum' combiner.
      config: `RunConfig` object to configure the runtime settings.
      feature_engineering_fn: Feature engineering function. Takes features and
                        targets which are the output of `input_fn` and
                        returns features and targets which will be fed
                        into the model.

    Returns:
      A `LinearClassifier` estimator.

    Raises:
      ValueError: if n_classes < 2.
    """
        # TODO(zoy): Give an unsupported error if enable_centered_bias is
        #    requested for SDCA once its default changes to False.
        if enable_centered_bias is None:
            enable_centered_bias = True
            dnn_linear_combined._changing_default_center_bias()  # pylint: disable=protected-access
        self._model_dir = model_dir or tempfile.mkdtemp()
        if n_classes < 2:
            raise ValueError("Classification requires n_classes >= 2")
        self._n_classes = n_classes
        self._feature_columns = feature_columns
        assert self._feature_columns
        self._weight_column_name = weight_column_name
        self._optimizer = _get_default_optimizer(feature_columns)
        if optimizer:
            self._optimizer = _get_optimizer(optimizer)
        num_ps_replicas = config.num_ps_replicas if config else 0

        chief_hook = None
        if isinstance(optimizer, sdca_optimizer.SDCAOptimizer):
            assert not _joint_weight, ("_joint_weight is incompatible with the"
                                       " SDCAOptimizer")
            model_fn = sdca_classifier_model_fn
            # We use a hook to perform the weight update and shrink step only on the
            # chief. Because the SdcaModel constructed by the estimator within the
            # call to fit() but we need to pass the hook to fit(), we pass the hook
            # as a parameter to the model_fn and have that propagate the model to the
            # hook.
            chief_hook = _SdcaUpdateWeightsHook()
            params = {
                "feature_columns": feature_columns,
                "optimizer": self._optimizer,
                "weight_column_name": weight_column_name,
                "loss_type": "logistic_loss",
                "update_weights_hook": chief_hook,
            }
        else:
            model_fn = _linear_classifier_model_fn
            params = {
                "n_classes": n_classes,
                "weight_column_name": weight_column_name,
                "feature_columns": feature_columns,
                "optimizer": self._optimizer,
                "gradient_clip_norm": gradient_clip_norm,
                "enable_centered_bias": enable_centered_bias,
                "num_ps_replicas": num_ps_replicas,
                "joint_weights": _joint_weight,
            }

        self._estimator = estimator.Estimator(
            model_fn=model_fn,
            model_dir=self._model_dir,
            config=config,
            params=params,
            feature_engineering_fn=feature_engineering_fn)

        self._additional_run_hook = None
        if self._estimator.config.is_chief:
            self._additional_run_hook = chief_hook
Beispiel #27
0
def single_value_rnn_classifier(num_classes,
                                num_units,
                                sequence_feature_columns,
                                context_feature_columns=None,
                                cell_type='basic_rnn',
                                num_rnn_layers=1,
                                optimizer_type='SGD',
                                learning_rate=0.1,
                                predict_probabilities=False,
                                momentum=None,
                                gradient_clipping_norm=10.0,
                                input_keep_probability=None,
                                output_keep_probability=None,
                                model_dir=None,
                                config=None,
                                params=None,
                                feature_engineering_fn=None):
  """Creates a RNN `Estimator` that predicts single labels.

  Args:
    num_classes: The number of classes for categorization.
    num_units: The size of the RNN cells.
    sequence_feature_columns: An iterable containing all the feature columns
      describing sequence features. All items in the set should be instances
      of classes derived from `FeatureColumn`.
    context_feature_columns: An iterable containing all the feature columns
      describing context features i.e. features that apply accross all time
      steps. All items in the set should be instances of classes derived from
      `FeatureColumn`.
    cell_type: A subclass of `RNNCell`, an instance of an `RNNCell or one of
      'basic_rnn,' 'lstm' or 'gru'.
    num_rnn_layers: Number of RNN layers.
    optimizer_type: The type of optimizer to use. Either a subclass of
      `Optimizer`, an instance of an `Optimizer` or a string. Strings must be
      one of 'Adagrad', 'Momentum' or 'SGD'.
    learning_rate: Learning rate.
    predict_probabilities: A boolean indicating whether to predict probabilities
      for all classes.
    momentum: Momentum value. Only used if `optimizer_type` is 'Momentum'.
    gradient_clipping_norm: Parameter used for gradient clipping. If `None`,
      then no clipping is performed.
    input_keep_probability: Probability to keep inputs to `cell`. If `None`,
      no dropout is applied.
    output_keep_probability: Probability to keep outputs to `cell`. If `None`,
      no dropout is applied.
    model_dir: Directory to use for The directory in which to save and restore
      the model graph, parameters, etc.
    config: A `RunConfig` instance.
    params: `dict` of hyperparameters. Passed through to `Estimator`.
    feature_engineering_fn: Takes features and labels which are the output of
      `input_fn` and returns features and labels which will be fed into
      `model_fn`. Please check `model_fn` for a definition of features and
      labels.
  Returns:
    An initialized `Estimator`.
  """
  cell = _to_rnn_cell(cell_type, num_units, num_rnn_layers)
  target_column = layers.multi_class_target(n_classes=num_classes)
  if optimizer_type == 'Momentum':
    optimizer_type = momentum_opt.MomentumOptimizer(learning_rate, momentum)
  dynamic_rnn_model_fn = _get_dynamic_rnn_model_fn(
      cell=cell,
      target_column=target_column,
      problem_type=ProblemType.CLASSIFICATION,
      prediction_type=PredictionType.SINGLE_VALUE,
      optimizer=optimizer_type,
      sequence_feature_columns=sequence_feature_columns,
      context_feature_columns=context_feature_columns,
      predict_probabilities=predict_probabilities,
      learning_rate=learning_rate,
      gradient_clipping_norm=gradient_clipping_norm,
      input_keep_probability=input_keep_probability,
      output_keep_probability=output_keep_probability,
      name='SingleValueRnnClassifier')

  return estimator.Estimator(model_fn=dynamic_rnn_model_fn,
                             model_dir=model_dir,
                             config=config,
                             params=params,
                             feature_engineering_fn=feature_engineering_fn)
Beispiel #28
0
def multi_value_rnn_classifier(num_classes,
                               num_units,
                               num_unroll,
                               batch_size,
                               input_key_column_name,
                               sequence_feature_columns,
                               context_feature_columns=None,
                               num_rnn_layers=1,
                               optimizer_type='SGD',
                               learning_rate=0.1,
                               predict_probabilities=False,
                               momentum=None,
                               gradient_clipping_norm=5.0,
                               input_keep_probability=None,
                               output_keep_probability=None,
                               model_dir=None,
                               config=None,
                               feature_engineering_fn=None,
                               num_threads=3,
                               queue_capacity=1000):
    """Creates a RNN `Estimator` that predicts sequences of labels.

  Args:
    num_classes: The number of classes for categorization.
    num_units: The size of the RNN cells.
    num_unroll: Python integer, how many time steps to unroll at a time.
      The input sequences of length `k` are then split into `k / num_unroll`
      many segments.
    batch_size: Python integer, the size of the minibatch.
    input_key_column_name: Python string, the name of the feature column
      containing a string scalar `Tensor` that serves as a unique key to
      identify input sequence across minibatches.
    sequence_feature_columns: An iterable containing all the feature columns
      describing sequence features. All items in the set should be instances
      of classes derived from `FeatureColumn`.
    context_feature_columns: An iterable containing all the feature columns
      describing context features, i.e., features that apply accross all time
      steps. All items in the set should be instances of classes derived from
      `FeatureColumn`.
    num_rnn_layers: Number of RNN layers.
    optimizer_type: The type of optimizer to use. Either a subclass of
      `Optimizer`, an instance of an `Optimizer` or a string. Strings must be
      one of 'Adagrad', 'Momentum' or 'SGD'.
    learning_rate: Learning rate. This argument has no effect if `optimizer`
      is an instance of an `Optimizer`.
    predict_probabilities: A boolean indicating whether to predict probabilities
      for all classes.
    momentum: Momentum value. Only used if `optimizer_type` is 'Momentum'.
    gradient_clipping_norm: Parameter used for gradient clipping. If `None`,
      then no clipping is performed.
    input_keep_probability: Probability to keep inputs to `cell`. If `None`,
      no dropout is applied.
    output_keep_probability: Probability to keep outputs of `cell`. If `None`,
      no dropout is applied.
    model_dir: The directory in which to save and restore the model graph,
      parameters, etc.
    config: A `RunConfig` instance.
    feature_engineering_fn: Takes features and labels which are the output of
      `input_fn` and returns features and labels which will be fed into
      `model_fn`. Please check `model_fn` for a definition of features and
      labels.
    num_threads: The Python integer number of threads enqueuing input examples
      into a queue. Defaults to 3.
    queue_capacity: The max capacity of the queue in number of examples.
      Needs to be at least `batch_size`. Defaults to 1000. When iterating
      over the same input example multiple times reusing their keys the
      `queue_capacity` must be smaller than the number of examples.
  Returns:
    An initialized `Estimator`.
  """
    cell = lstm_cell(num_units, num_rnn_layers)
    target_column = layers.multi_class_target(n_classes=num_classes)
    if optimizer_type == 'Momentum':
        optimizer_type = momentum_opt.MomentumOptimizer(
            learning_rate, momentum)
    rnn_model_fn = _get_rnn_model_fn(
        cell=cell,
        target_column=target_column,
        problem_type=ProblemType.CLASSIFICATION,
        optimizer=optimizer_type,
        num_unroll=num_unroll,
        num_layers=num_rnn_layers,
        num_threads=num_threads,
        queue_capacity=queue_capacity,
        batch_size=batch_size,
        input_key_column_name=input_key_column_name,
        sequence_feature_columns=sequence_feature_columns,
        context_feature_columns=context_feature_columns,
        predict_probabilities=predict_probabilities,
        learning_rate=learning_rate,
        gradient_clipping_norm=gradient_clipping_norm,
        input_keep_probability=input_keep_probability,
        output_keep_probability=output_keep_probability,
        name='MultiValueRnnClassifier')

    return estimator.Estimator(model_fn=rnn_model_fn,
                               model_dir=model_dir,
                               config=config,
                               feature_engineering_fn=feature_engineering_fn)
Beispiel #29
0
    def __init__(self,
                 hidden_units,
                 feature_columns,
                 model_dir=None,
                 n_classes=2,
                 weight_column_name=None,
                 optimizer=None,
                 activation_fn=nn.relu,
                 dropout=None,
                 gradient_clip_norm=None,
                 enable_centered_bias=False,
                 config=None,
                 feature_engineering_fn=None):
        """Initializes a DNNClassifier instance.

    Args:
      hidden_units: List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns: An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into a estimator to
        continue training a previously saved model.
      n_classes: number of label classes. Default is binary classification.
        It must be greater than 1.
      weight_column_name: A string defining feature column name representing
        weights. It is used to down weight or boost examples during training. It
        will be multiplied by the loss of the example.
      optimizer: An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      activation_fn: Activation function applied to each layer. If `None`, will
        use `tf.nn.relu`.
      dropout: When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm: A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        `tf.clip_by_global_norm` for more details.
      enable_centered_bias: A bool. If True, estimator will learn a centered
        bias variable for each class. Rest of the model structure learns the
        residual after centered bias.
      config: `RunConfig` object to configure the runtime settings.
      feature_engineering_fn: Feature engineering function. Takes features and
                        labels which are the output of `input_fn` and
                        returns features and labels which will be fed
                        into the model.

    Returns:
      A `DNNClassifier` estimator.

    Raises:
      ValueError: If `n_classes` < 2.
    """
        self._hidden_units = hidden_units
        self._feature_columns = feature_columns
        self._model_dir = model_dir or tempfile.mkdtemp()
        if n_classes <= 1:
            raise ValueError(
                "Classification requires n_classes >= 2. Given: {}".format(
                    n_classes))
        self._n_classes = n_classes
        self._weight_column_name = weight_column_name
        optimizer = optimizer or "Adagrad"
        num_ps_replicas = config.num_ps_replicas if config else 0

        self._estimator = estimator.Estimator(
            model_fn=_dnn_classifier_model_fn,
            model_dir=self._model_dir,
            config=config,
            params={
                "hidden_units": hidden_units,
                "feature_columns": feature_columns,
                "n_classes": n_classes,
                "weight_column_name": weight_column_name,
                "optimizer": optimizer,
                "activation_fn": activation_fn,
                "dropout": dropout,
                "gradient_clip_norm": gradient_clip_norm,
                "enable_centered_bias": enable_centered_bias,
                "num_ps_replicas": num_ps_replicas,
            },
            feature_engineering_fn=feature_engineering_fn)
 def testMonitorsForFit(self):
     est = estimator.Estimator(model_fn=linear_model_fn)
     est.fit(input_fn=boston_input_fn,
             steps=21,
             monitors=[CheckCallsMonitor(expect_calls=21)])