def testIrisES(self):
    random.seed(42)

    iris = datasets.load_iris()
    x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        test_size=0.2,
                                                        random_state=42)

    x_train, x_val, y_train, y_val = train_test_split(
        x_train, y_train, test_size=0.2)
    val_monitor = learn.monitors.ValidationMonitor(x_val, y_val,
                                                   early_stopping_rounds=100)

    # classifier without early stopping - overfitting
    classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                n_classes=3,
                                                steps=1000)
    classifier1.fit(x_train, y_train)
    accuracy_score(y_test, classifier1.predict(x_test))

    # classifier with early stopping - improved accuracy on testing set
    classifier2 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                n_classes=3,
                                                steps=1000)

    classifier2.fit(x_train, y_train, monitors=[val_monitor])
    accuracy_score(y_test, classifier2.predict(x_test))
    def testIrisES(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                            iris.target,
                                                            test_size=0.2,
                                                            random_state=42)

        x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                          y_train,
                                                          test_size=0.2,
                                                          random_state=42)
        val_monitor = learn.monitors.ValidationMonitor(
            x_val,
            y_val,
            every_n_steps=50,
            early_stopping_rounds=100,
            early_stopping_metric='accuracy',
            early_stopping_metric_minimize=False)

        # classifier without early stopping - overfitting
        classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                    n_classes=3,
                                                    steps=1000)
        classifier1.fit(x_train, y_train)
        _ = accuracy_score(y_test, classifier1.predict(x_test))

        # Full 1000 steps, 11 summaries and no evaluation summary.
        # 11 summaries = first + every 100 out of 1000 steps.
        self.assertEqual(11, len(_get_summary_events(classifier1.model_dir)))
        with self.assertRaises(ValueError):
            _get_summary_events(classifier1.model_dir + '/eval')

        # classifier with early stopping - improved accuracy on testing set
        classifier2 = learn.TensorFlowDNNClassifier(
            hidden_units=[10, 20, 10],
            n_classes=3,
            steps=2000,
            config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))

        classifier2.fit(x_train, y_train, monitors=[val_monitor])
        _ = accuracy_score(y_val, classifier2.predict(x_val))
        _ = accuracy_score(y_test, classifier2.predict(x_test))

        # Note, this test is unstable, so not checking for equality.
        # See stability_test for examples of stability issues.
        if val_monitor.early_stopped:
            self.assertLess(val_monitor.best_step, 2000)
            # Note, due to validation monitor stopping after the best score occur,
            # the accuracy at current checkpoint is less.
            # TODO(ipolosukhin): Time machine for restoring old checkpoints?
            # flaky, still not always best_value better then score2 value.
            # self.assertGreater(val_monitor.best_value, score2_val)

            # Early stopped, unstable so checking only < then max.
            self.assertLess(len(_get_summary_events(classifier2.model_dir)),
                            21)
            self.assertLess(
                len(_get_summary_events(classifier2.model_dir + '/eval')), 4)
Exemple #3
0
    def testIrisMomentum(self):
        random.seed(42)

        iris = datasets.load_iris()
        X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                            iris.target,
                                                            test_size=0.2,
                                                            random_state=42)

        # setup exponential decay function
        def exp_decay(global_step):
            return tf.train.exponential_decay(learning_rate=0.1,
                                              global_step=global_step,
                                              decay_steps=100,
                                              decay_rate=0.001)

        custom_optimizer = lambda learning_rate: tf.train.MomentumOptimizer(
            learning_rate, 0.9)
        classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                   n_classes=3,
                                                   steps=800,
                                                   learning_rate=exp_decay,
                                                   optimizer=custom_optimizer)
        classifier.fit(X_train, y_train)
        score = accuracy_score(y_test, classifier.predict(X_test))

        self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
Exemple #4
0
    def testIrisMomentum(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                            iris.target,
                                                            test_size=0.2,
                                                            random_state=42)

        def custom_optimizer(learning_rate):
            return tf.train.MomentumOptimizer(learning_rate, 0.9)

        classifier = learn.TensorFlowDNNClassifier(
            hidden_units=[10, 20, 10],
            feature_columns=learn.infer_real_valued_columns_from_input(
                x_train),
            n_classes=3,
            steps=400,
            learning_rate=0.01,
            optimizer=custom_optimizer)
        classifier.fit(x_train, y_train)
        score = accuracy_score(y_test, classifier.predict(x_test))

        self.assertGreater(score, 0.65,
                           "Failed with score = {0}".format(score))
Exemple #5
0
def get_updated_model(log_file, data):
    """
    :param name:
    :param data:
    :param labels:
    :return:

    >>> log_file = '/tmp/tf_examples/two_layer_final_model_DNN_32_128_10000_0.01/'
    >>> data  = data_dict_p
    """
    layers, steps, lr = [32, 128], 10000, .01
    model = skflow.TensorFlowDNNClassifier(hidden_units=layers,
                                           n_classes=2,
                                           batch_size=128,
                                           steps=steps,
                                           learning_rate=lr)

    m = monitors.ValidationMonitor(data['X_train'],
                                   data['y_train'].values,
                                   every_n_steps=200)
    model.fit(data['X'], list(data['y'].values), logdir=log_file)

    _pred = model.predict(data['X'])
    print accuracy_score(_pred, data['y'].values)
    print confusion_matrix(_pred, data['y'])

    return model
Exemple #6
0
def train_inital_tf_model(data):
    """ Run this only once, otherwise it take forever    """
    layers, steps, lr = [32, 128], 10000, .01
    layers_str = str(layers).replace('[', '').replace(']',
                                                      '').replace(', ', '_')
    log_dir = '/tmp/tf_examples/{}_DNN_{}_{}_{}/'.format(
        "two_layer_final_model", layers_str, steps, lr)

    model = skflow.TensorFlowDNNClassifier(hidden_units=layers,
                                           n_classes=2,
                                           batch_size=128,
                                           steps=steps,
                                           learning_rate=lr)
    m = monitors.ValidationMonitor(data['X_train'],
                                   data['y_train'].values,
                                   every_n_steps=200)
    model.fit(data['X_train'],
              list(data['y_train'].values),
              logdir=log_dir,
              monitors=[m])

    _pred = model.predict(data['X'])
    print accuracy_score(_pred, data['y'].values)
    print confusion_matrix(_pred, data['y'])
    # model.save(log_dir)

    return model, log_dir
Exemple #7
0
 def testNoCheckpoints(self):
     path = tf.test.get_temp_dir() + '/tmp/tmp.saver4'
     random.seed(42)
     iris = datasets.load_iris()
     classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                n_classes=3)
     classifier.fit(iris.data, iris.target)
     classifier.save(path)
Exemple #8
0
 def testDNNDropout0_1(self):
     # Dropping only a little.
     iris = datasets.load_iris()
     classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                n_classes=3,
                                                dropout=0.1)
     classifier.fit(iris.data, iris.target)
     score = accuracy_score(iris.target, classifier.predict(iris.data))
     self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))
Exemple #9
0
 def testNoCheckpoints(self):
     path = tf.test.get_temp_dir() + '/tmp/tmp.saver4'
     random.seed(42)
     iris = datasets.load_iris()
     classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
     classifier.fit(iris.data, iris.target)
     classifier.save(path)
     os.remove(os.path.join(path, 'checkpoint'))
     with self.assertRaises(ValueError):
         learn.TensorFlowEstimator.restore(path)
Exemple #10
0
 def testDNN(self):
     path = tf.test.get_temp_dir() + '/tmp_saver3'
     random.seed(42)
     iris = datasets.load_iris()
     classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
     classifier.fit(iris.data, iris.target)
     classifier.save(path)
     new_classifier = learn.TensorFlowEstimator.restore(path)
     self.assertEqual(type(new_classifier), type(classifier))
     score = accuracy_score(iris.target, new_classifier.predict(iris.data))
     self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
Exemple #11
0
 def testNoCheckpoints(self):
     path = tf.test.get_temp_dir() + '/tmp/tmp.saver4'
     random.seed(42)
     iris = datasets.load_iris()
     cont_features = [tf.contrib.layers.real_valued_column('', dimension=4)]
     classifier = learn.TensorFlowDNNClassifier(
         feature_columns=cont_features,
         hidden_units=[10, 20, 10],
         n_classes=3)
     classifier.fit(iris.data, iris.target)
     classifier.save(path)
Exemple #12
0
 def testIrisDNN(self):
   if HAS_SKLEARN:
     random.seed(42)
     iris = datasets.load_iris()
     classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                n_classes=3,
                                                steps=50)
     grid_search = GridSearchCV(classifier,
                                {'hidden_units': [[5, 5], [10, 10]],
                                 'learning_rate': [0.1, 0.01]})
     grid_search.fit(iris.data, iris.target)
     score = accuracy_score(iris.target, grid_search.predict(iris.data))
     self.assertGreater(score, 0.5, 'Failed with score = {0}'.format(score))
Exemple #13
0
 def testIrisDNN(self):
     random.seed(42)
     iris = datasets.load_iris()
     classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                n_classes=3)
     classifier.fit(iris.data, iris.target)
     score = accuracy_score(iris.target, classifier.predict(iris.data))
     self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))
     weights = classifier.weights_
     self.assertEqual(weights[0].shape, (4, 10))
     self.assertEqual(weights[1].shape, (10, 20))
     self.assertEqual(weights[2].shape, (20, 10))
     self.assertEqual(weights[3].shape, (10, 3))
     biases = classifier.bias_
     self.assertEqual(len(biases), 4)
Exemple #14
0
if reset_seed:
    random.seed(42)
tflr = skflow.TensorFlowLinearClassifier(n_classes=2,
                                         batch_size=128,
                                         steps=500,
                                         learning_rate=learning_rate)
tflr.fit(X_train, y_train)
print(accuracy_score(tflr.predict(X_test), y_test))

# 3 layer neural network with rectified linear activation.

if reset_seed:
    random.seed(42)
classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                            n_classes=2,
                                            batch_size=128,
                                            steps=500,
                                            learning_rate=learning_rate)
classifier.fit(X_train, y_train)
print(accuracy_score(classifier.predict(X_test), y_test))

# 3 layer neural network with hyperbolic tangent activation.


def dnn_tanh(X, y):
    layers = skflow.ops.dnn(X, [20, 20, 20], tf.tanh)
    return skflow.models.logistic_regression(layers, y)


if reset_seed:
    random.seed(42)