Example #1
0
    def __init__(self,
                 params,
                 device_assigner=None,
                 model_dir=None,
                 graph_builder_class=tensor_forest.RandomForestGraphs,
                 config=None,
                 weights_name=None,
                 keys_name=None,
                 feature_engineering_fn=None,
                 early_stopping_rounds=100,
                 num_trainers=1,
                 trainer_id=0):
        """Initializes a TensorForestEstimator instance.

    Args:
      params: ForestHParams object that holds random forest hyperparameters.
        These parameters will be passed into `model_fn`.
      device_assigner: An `object` instance that controls how trees get
        assigned to devices. If `None`, will use
        `tensor_forest.RandomForestDeviceAssigner`.
      model_dir: Directory to save model parameters, graph, etc. To continue
        training a previously saved model, load checkpoints saved to this
        directory into an estimator.
      graph_builder_class: An `object` instance that defines how TF graphs for
        random forest training and inference are built. By default will use
        `tensor_forest.RandomForestGraphs`.
      config: `RunConfig` object to configure the runtime settings.
      weights_name: A string defining feature column name representing
        weights. Will be multiplied by the loss of the example. Used to
        downweight or boost examples during training.
      keys_name: A string defining feature column name representing example
        keys. Used by `predict_with_keys` method.
      feature_engineering_fn: Feature engineering function. Takes features and
        labels which are the output of `input_fn` and returns features and
        labels which will be fed into the model.
      early_stopping_rounds: Allows training to terminate early if the forest is
        no longer growing. 100 by default.
      num_trainers: Number of training jobs, which will partition trees
        among them.
      trainer_id: Which trainer this instance is.

    Returns:
      A `TensorForestEstimator` instance.
    """
        self.params = params.fill()
        self.graph_builder_class = graph_builder_class
        self.early_stopping_rounds = early_stopping_rounds
        self.weights_name = weights_name
        self._estimator = estimator.Estimator(
            model_fn=get_model_fn(params,
                                  graph_builder_class,
                                  device_assigner,
                                  weights_name=weights_name,
                                  keys_name=keys_name,
                                  num_trainers=num_trainers,
                                  trainer_id=trainer_id),
            model_dir=model_dir,
            config=config,
            feature_engineering_fn=feature_engineering_fn)
        self._skcompat = estimator.SKCompat(self._estimator)
Example #2
0
 def testUntrained(self):
   boston = base.load_boston()
   est = estimator.SKCompat(estimator.Estimator(model_fn=linear_model_fn))
   with self.assertRaises(learn.NotFittedError):
     _ = est.score(x=boston.data, y=boston.target.astype(np.float64))
   with self.assertRaises(learn.NotFittedError):
     est.predict(x=boston.data)
Example #3
0
def build_lr_estimator(model_dir, feature_count):
    return estimator.SKCompat(
        learn.LinearClassifier(feature_columns=[
            tf.contrib.layers.real_valued_column("", dimension=feature_count)
        ],
                               n_classes=2,
                               model_dir=model_dir))
def main(unused_argv):
    # Load datasets.
    training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
        filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
    test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
        filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)

    validation_metrics = {
        "accuracy":
        tf.contrib.learn.MetricSpec(
            metric_fn=tf.contrib.metrics.streaming_accuracy,
            prediction_key=tf.contrib.learn.PredictionKey.CLASSES),
        "precision":
        tf.contrib.learn.MetricSpec(
            metric_fn=tf.contrib.metrics.streaming_precision,
            prediction_key=tf.contrib.learn.PredictionKey.CLASSES),
        "recall":
        tf.contrib.learn.MetricSpec(
            metric_fn=tf.contrib.metrics.streaming_recall,
            prediction_key=tf.contrib.learn.PredictionKey.CLASSES)
    }

    # Create a ValidationMonitor
    validation_monitor = tf.contrib.learn.monitors.ValidationMonitor(
        test_set.data,
        test_set.target,
        every_n_steps=50,
        metrics=validation_metrics,
        early_stopping_metric="loss",
        early_stopping_metric_minimize=True,
        early_stopping_rounds=200)

    # Specify that all features have real-value data
    feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)]

    # Build 3 layer DNN with 10, 20, 10 units respectively.
    classifier = tf.contrib.learn.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=[10, 20, 10],
        n_classes=3,
        model_dir="/tmp/iris_model",
        config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
    clf = estimator.SKCompat(classifier)

    # Fit model
    clf.fit(x=training_set.data, y=training_set.target, steps=2000)
    #           monitors=[validation_monitor])

    # Evaluate accuracy
    accuracy_score = clf.score(x=test_set.data, y=test_set.target)["accuracy"]
    print("Accuracy: {0:f}".format(accuracy_score))

    # Classify two new flower samples.
    new_samples = np.array([[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]],
                           dtype=float)
    y = list(clf.predict(new_samples))
    print('Predictions: {}'.format(str(y)))
def build_rf_estimator(model_dir, feature_count):
    params = tensor_forest.ForestHParams(
        num_classes=2,
        num_features=feature_count,
        num_trees=100,
        max_nodes=1000,
        min_split_samples=10)
    
    graph_builder_class = tensor_forest.RandomForestGraphs
    return estimator.SKCompat(random_forest.TensorForestEstimator(
        params, graph_builder_class=graph_builder_class,
        model_dir=model_dir))
Example #6
0
def nonconv(training_data, validation_data, test_data):
    feature_columns = learn.infer_real_valued_columns_from_input(
        training_data.images)
    classifier = learn.DNNClassifier([100], feature_columns, model_dir=None, n_classes=10, optimizer=tf.train.FtrlOptimizer(0.3, l2_regularization_strength=0.1), activation_fn=nn.sigmoid, dropout=0.2)

    estimator.SKCompat(classifier).fit(training_data.images,
                   training_data.labels.astype(np.int32),
                   batch_size=10,
                   steps=200000)
    mytuple = (test_data.labels,
                                   list(classifier.predict(test_data.images)))
    score = metrics.accuracy_score(*mytuple)
    print('Accuracy: {0:f}'.format(score))
Example #7
0
def build_estimator(model_dir):
  """Build an estimator."""
  params = tensor_forest.ForestHParams(
      num_classes=10, num_features=784,
      num_trees=FLAGS.num_trees, max_nodes=FLAGS.max_nodes)
  graph_builder_class = tensor_forest.RandomForestGraphs
  if FLAGS.use_training_loss:
    graph_builder_class = tensor_forest.TrainingLossForest
  # Use the SKCompat wrapper, which gives us a convenient way to split
  # in-memory data like MNIST into batches.
  return estimator.SKCompat(random_forest.TensorForestEstimator(
      params, graph_builder_class=graph_builder_class,
      model_dir=model_dir))
 def testBostonAll(self):
   boston = base.load_boston()
   est = estimator.SKCompat(estimator.Estimator(model_fn=linear_model_fn))
   float64_labels = boston.target.astype(np.float64)
   est.fit(x=boston.data, y=float64_labels, steps=100)
   scores = est.score(
       x=boston.data,
       y=float64_labels,
       metrics={'MSE': metric_ops.streaming_mean_squared_error})
   predictions = np.array(list(est.predict(x=boston.data)))
   other_score = _sklearn.mean_squared_error(predictions, boston.target)
   self.assertAllClose(scores['MSE'], other_score)
   self.assertTrue('global_step' in scores)
   self.assertEqual(100, scores['global_step'])
Example #9
0
def loadModel():
    global classifier
    classifier = estimator.SKCompat(estimator.Estimator(
        model_fn=news_cnn_model.generate_cnn_model(N_CLASSES, n_words),
        model_dir=MODEL_DIR
    ))
    df = pd.read_csv(CSV_FILE, header=None)

    train_df = df[0:1]
    x_train = train_df[1]
    x_train = np.array(list(vocab_processor.transform(x_train)), dtype=int)
    y_train = np.array(train_df[0], dtype=int)
    classifier.score(x_train, y_train)

    print 'Model updated'
def build_estimator(model_dir):
    params = tensor_forest.ForestHParams(
        num_classes=config.num_classes,
        num_features=config.num_features,
        num_trees=config.num_trees,
        max_nodes=config.max_nodes,
        bagging_fraction=config.bagging_fraction,
        feature_bagging_fraction=config.feature_bagging_fraction)
    graph_builder_class = tensor_forest.RandomForestGraphs
    if config.use_training_loss:
        graph_builder_class = tensor_forest.TrainingLossForest
    # Use the SKCompat wrapper, which gives us a convenient way to split
    # in-memory data like MNIST into batches.
    return estimator.SKCompat(
        random_forest.TensorForestEstimator(
            params,
            graph_builder_class=graph_builder_class,
            model_dir=model_dir))
def main(unused_argv):
    # global n_words
    # Prepare training and testing data
    dbpedia = learn.datasets.load_dataset(
        'dbpedia', test_with_fake_data=False)  #FLAGS.test_with_fake_data)
    x_train = pandas.DataFrame(dbpedia.train.data)[1]
    y_train = pandas.Series(dbpedia.train.target)
    x_test = pandas.DataFrame(dbpedia.test.data)[1]
    y_test = pandas.Series(dbpedia.test.target)

    if FLAGS.embeddings:
        model_, vocabulary_, x_transform_train, x_transform_test = process_emb(
            x_train, x_test)
    else:
        model_, vocabulary_, x_transform_train, x_transform_test = process_cat(
            x_train, x_test)

    x_train = np.array(list(x_transform_train))
    x_test = np.array(list(x_transform_test))

    setting.n_words = len(vocabulary_)

    print('Total words: %d' % setting.n_words)
    print('x_train shape: ' + str(x_train.shape))
    print('x_test shape: ' + str(x_test.shape))

    # Build model
    # Switch between rnn_model and bag_of_words_model to test different models.
    model_fn = rnn_model
    if FLAGS.bow_model:
        model_fn = model_
    else:
        model_fn = rnn_model

    classifier = estimator.Estimator(model_fn=model_fn)

    # Train and predict
    estimator.SKCompat(classifier).fit(x_train, y_train, steps=100)
    y_predicted = [
        p['class'] for p in classifier.predict(x_test, as_iterable=True)
    ]
    score = metrics.accuracy_score(y_test, y_predicted)
    print('Accuracy: {0:f}'.format(score))
 def testCheckInputs(self):
     est = estimator.SKCompat(estimator.Estimator(model_fn=linear_model_fn))
     # Lambdas so we have to different objects to compare
     right_features = lambda: np.ones(shape=[7, 8], dtype=np.float32)
     right_labels = lambda: np.ones(shape=[7, 10], dtype=np.int32)
     est.fit(right_features(), right_labels(), steps=1)
     # TODO(wicke): This does not fail for np.int32 because of data_feeder magic.
     wrong_type_features = np.ones(shape=[7, 8], dtype=np.int64)
     wrong_size_features = np.ones(shape=[7, 10])
     wrong_type_labels = np.ones(shape=[7, 10], dtype=np.float32)
     wrong_size_labels = np.ones(shape=[7, 11])
     est.fit(x=right_features(), y=right_labels(), steps=1)
     with self.assertRaises(ValueError):
         est.fit(x=wrong_type_features, y=right_labels(), steps=1)
     with self.assertRaises(ValueError):
         est.fit(x=wrong_size_features, y=right_labels(), steps=1)
     with self.assertRaises(ValueError):
         est.fit(x=right_features(), y=wrong_type_labels, steps=1)
     with self.assertRaises(ValueError):
         est.fit(x=right_features(), y=wrong_size_labels, steps=1)
 def testIrisAll(self):
   iris = base.load_iris()
   est = estimator.SKCompat(
       estimator.Estimator(model_fn=logistic_model_no_mode_fn))
   est.fit(iris.data, iris.target, steps=100)
   scores = est.score(
       x=iris.data,
       y=iris.target,
       metrics={('accuracy', 'class'): metric_ops.streaming_accuracy})
   predictions = est.predict(x=iris.data)
   predictions_class = est.predict(x=iris.data, outputs=['class'])['class']
   self.assertEqual(predictions['prob'].shape[0], iris.target.shape[0])
   self.assertAllClose(predictions['class'], predictions_class)
   self.assertAllClose(
       predictions['class'], np.argmax(
           predictions['prob'], axis=1))
   other_score = _sklearn.accuracy_score(iris.target, predictions['class'])
   self.assertAllClose(scores['accuracy'], other_score)
   self.assertTrue('global_step' in scores)
   self.assertEqual(100, scores['global_step'])
Example #14
0
    def build_model(self, global_step, is_chief, sync, num_replicas):
        # Load datasets.
        self.training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
            filename=IRIS_TRAINING,
            target_dtype=np.int,
            features_dtype=np.float32)
        self.test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
            filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)

        # Specify that all features have real-value data
        feature_columns = [
            tf.contrib.layers.real_valued_column("", dimension=4)
        ]

        # Build 3 layer DNN with 10, 20, 10 units respectively.
        dnnClassifier = tf.contrib.learn.DNNClassifier(
            feature_columns=feature_columns,
            hidden_units=[10, 20, 10],
            n_classes=3,
            model_dir="/tmp/iris_model")
        self.classifier = estimator.SKCompat(dnnClassifier)

        return None
Example #15
0
    def train(self, data: np.ndarray, labels: np.ndarray):
        """Trains the decision forest classifier.

        Args:
            data (np.ndarray): The training data.
            labels (np.ndarray): The labels of the training data.
        """

        # build the estimator
        if self.use_training_loss:
            graph_builder_class = tensor_forest.TrainingLossForest
        else:
            graph_builder_class = tensor_forest.RandomForestGraphs

        self.estimator = estimator.SKCompat(
            random_forest.TensorForestEstimator(
                self.parameters,
                graph_builder_class=graph_builder_class,
                model_dir=self.model_dir,
                report_feature_importances=self.report_feature_importances
        ))

        self.estimator.fit(x=data, y=labels, batch_size=self.batch_size)
Example #16
0
def main(unused_argv):
    # Load datasets.
    training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
        filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
    test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
        filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)

    # Specify that all features have real-value data
    feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)]

    # Build 3 layer DNN with 10, 20, 10 units respectively.
    dnnClassifier = tf.contrib.learn.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=[10, 20, 10],
        n_classes=3,
        model_dir="/tmp/iris_model",
        config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
    classifier = estimator.SKCompat(dnnClassifier)

    # var_names = dnnClassifier.get_variable_names()
    # print("Variable names:{}".format(var_names))

    # Fit model
    classifier.fit(x=training_set.data, y=training_set.target, max_steps=2000)

    # Evaluate accuracy
    scores = classifier.score(x=test_set.data, y=test_set.target)
    print("Accuracy: {0:f}".format(scores["accuracy"]))
    print("global_step: {0}".format(scores["global_step"]))
    print("auc: {0}".format(scores["auc"]))
    print("loss: {0}".format(scores["loss"]))

    # Classify two new flower samples.
    new_samples = np.array([[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]],
                           dtype=float)
    y_predicted = classifier.predict(new_samples)
    print('Predictions: {}'.format(str(y_predicted)))
Example #17
0
test, train = train[test_idx,:],\
              train[training_idx,:]
test_labels, train_labels = labels[test_idx],\
                            labels[training_idx]

train = np.array(train, dtype=np.float32)
test = np.array(test, dtype=np.float32)
train_labels = np.array(train_labels, dtype=np.int32)
test_labels = np.array(test_labels, dtype=np.int32)

# Convert features to learn style
feature_columns = learn.infer_real_valued_columns_from_input(
    train.reshape([-1, 36 * 36]))

# Logistic Regression
classifier = estimator.SKCompat(
    learn.LinearClassifier(feature_columns=feature_columns, n_classes=5))

# One line training
# steps is number of total batches
# steps*batch_size/len(train) = num_epochs
classifier.fit(train.reshape([-1, 36 * 36]),
               train_labels,
               steps=1024,
               batch_size=32)

# sklearn compatible accuracy
test_probs = classifier.predict(test.reshape([-1, 36 * 36]))
sklearn.metrics.accuracy_score(test_labels, test_probs['classes'])

# Dense neural net
classifier = estimator.SKCompat(
Example #18
0
                            strides=[1, 2, 2, 1],
                            padding='VALID')

        # Need to flatten conv output for use in dense layer
    p1_size = np.product([s.value for s in p1.get_shape()[1:]])
    p1f = tf.reshape(p1, [-1, p1_size])

    # densely connected layer with 32 neurons and dropout
    h_fc1 = layers.fully_connected(p1f, 5, activation_fn=tf.nn.relu)
    drop = layers.dropout(h_fc1,
                          keep_prob=0.5,
                          is_training=mode == tf.contrib.learn.ModeKeys.TRAIN)

    logits = layers.fully_connected(drop, 5, activation_fn=None)
    loss = tf.losses.softmax_cross_entropy(y, logits)
    # Setup the training function manually
    train_op = layers.optimize_loss(loss,
                                    tf.contrib.framework.get_global_step(),
                                    optimizer='Adam',
                                    learning_rate=0.01)
    return tf.argmax(logits, 1), loss, train_op


# Use generic estimator with our function
classifier = estimator.SKCompat(learn.Estimator(model_fn=conv_learn))

classifier.fit(train, train_labels, steps=1024, batch_size=32)

# simple accuracy
metrics.accuracy_score(test_labels, classifier.predict(test))
Example #19
0
lr = LogisticRegression()
lr.fit(X_train, y_train)
print(accuracy_score(lr.predict(X_test), y_test))


# Linear classifier.

random.seed(42)
tflr = learn.LinearClassifier(n_classes=2,
    feature_columns=learn.infer_real_valued_columns_from_input(X_train),
    optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05))
# tflr.fit(X_train, y_train, batch_size=128, steps=500)
# print(accuracy_score(tflr.predict(X_test), y_test))

est = estimator.SKCompat(tflr)
est.fit(X_train, y_train, batch_size=128, steps=500)
print(accuracy_score(est.predict(X_test)["classes"], y_test))

# 3 layer neural network with rectified linear activation.

random.seed(42)
classifier = learn.DNNClassifier(hidden_units=[10, 20, 10],
    n_classes=2,
    feature_columns=learn.infer_real_valued_columns_from_input(X_train),
    optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05))
# classifier.fit(X_train, y_train, batch_size=128, steps=500)
# print(accuracy_score(classifier.predict(X_test), y_test))
## use SKCompat

est = estimator.SKCompat(classifier)
 def testEstimatorParams(self):
     boston = base.load_boston()
     est = estimator.SKCompat(
         estimator.Estimator(model_fn=linear_model_params_fn,
                             params={'learning_rate': 0.01}))
     est.fit(x=boston.data, y=boston.target, steps=100)
def main(unused_argv):
    if REMOVE_PREVIOUS_MODEL:
        # Remove old model
        shutil.rmtree(MODEL_OUTPUT_DIR)
        os.mkdir(MODEL_OUTPUT_DIR)

    # Prepare training and testing data
    df = pd.read_csv(DATA_SET_FILE, header=None)
    train_df = df[0:3300]
    test_df = df.drop(train_df.index)

    # x - news title, y - class
    x_train = train_df[1]
    x_train = x_train.str.replace('[^\x00-\x7F]', '')

    #####################################
    '''
    x_train = train_df[2]
    x_train = x_train.str.replace('[^\x00-\x7F]','')
    tokenizer =  RegexpTokenizer(r"\w+")
    stemmer = PorterStemmer()
    #wnl = WordNetLemmatizer()

    for i in xrange(0,3000):
        x_train[i] = str(x_train[i])
        x_train[i] = tokenizer.tokenize(x_train[i])
        x_train[i] = list(word for word in x_train[i] if word not in stopwords.words('english'))
        x_train[i] = [stemmer.stem(word) for word in x_train[i]]
        #x_train[i] = [wnl.lemmatize(word) for word in x_train[i]]
        x_train[i] = " ".join(str(word) for word in x_train[i])
    '''
    ###########################################################

    y_train = np.array(train_df[0], dtype=int)
    x_test = test_df[1]
    y_test = np.array(test_df[0], dtype=int)

    # Process vocabulary
    vocab_processor = learn.preprocessing.VocabularyProcessor(
        MAX_DOCUMENT_LENGTH)
    x_train = np.array(list(vocab_processor.fit_transform(x_train)))
    x_test = np.array(list(vocab_processor.transform(x_test)))

    n_words = len(vocab_processor.vocabulary_)
    print('Total words: %d' % n_words)

    # Saving n_words and vocab_processor:
    with open(VARS_FILE, 'w') as f:
        pickle.dump(n_words, f)
    vocab_processor.save(VOCAB_PROCESSOR_SAVE_FILE)

    # Build model
    classifier = estimator.SKCompat(
        estimator.Estimator(model_fn=news_cnn_model.generate_cnn_model(
            N_CLASSES, n_words),
                            model_dir=MODEL_OUTPUT_DIR,
                            config=learn.RunConfig(save_checkpoints_secs=10,
                                                   save_summary_steps=10)))
    # Set up logging for predictions
    tensors_to_log = {"prob": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=100)

    # Train and predict
    classifier.fit(x_train,
                   y_train,
                   batch_size=BATCH,
                   steps=STEPS,
                   monitors=[logging_hook])

    # Configure the accuracy metric
    metrics = {
        "accuracy":
        learn.MetricSpec(metric_fn=tf.metrics.accuracy, prediction_key="class")
    }

    # Evaluate the model
    eval_results = classifier.score(x=x_test, y=y_test, metrics=metrics)
Example #22
0
test_idx, training_idx = indices[:valid_cnt],\
 indices[valid_cnt:]
test, train = train[test_idx,:],\
              train[training_idx,:]
test_labels, train_labels = labels[test_idx],\
                            labels[training_idx]

train = np.array(train, dtype=np.float32)
test = np.array(test, dtype=np.float32)
train_labels = np.array(train_labels, dtype=np.int32)
test_labels = np.array(test_labels, dtype=np.int32)

# Convert features to learn style
feature_columns = learn.infer_real_valued_columns_from_input(
    train.reshape([-1, 36 * 36]))

# Logistic Regression
classifier = estimator.SKCompat(
    learn.LinearClassifier(feature_columns=feature_columns, n_classes=5))

# One line training
# steps is number of total batches
# steps*batch_size/len(train) = num_epochs
classifier.fit(train.reshape([-1, 36 * 36]),
               train_labels,
               steps=1024,
               batch_size=32)

# sklearn compatible accuracy
test_probs = classifier.predict(test.reshape([-1, 36 * 36]))
sklearn.metrics.accuracy_score(test_labels, test_probs['classes'])