def testIrisES(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size=0.2) val_monitor = learn.monitors.ValidationMonitor(x_val, y_val, early_stopping_rounds=100) # classifier without early stopping - overfitting classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000) classifier1.fit(x_train, y_train) accuracy_score(y_test, classifier1.predict(x_test)) # classifier with early stopping - improved accuracy on testing set classifier2 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000) classifier2.fit(x_train, y_train, monitors=[val_monitor]) accuracy_score(y_test, classifier2.predict(x_test))
def testIrisES(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor( x_val, y_val, every_n_steps=50, early_stopping_rounds=100, early_stopping_metric='accuracy', early_stopping_metric_minimize=False) # classifier without early stopping - overfitting classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000) classifier1.fit(x_train, y_train) _ = accuracy_score(y_test, classifier1.predict(x_test)) # Full 1000 steps, 11 summaries and no evaluation summary. # 11 summaries = first + every 100 out of 1000 steps. self.assertEqual(11, len(_get_summary_events(classifier1.model_dir))) with self.assertRaises(ValueError): _get_summary_events(classifier1.model_dir + '/eval') # classifier with early stopping - improved accuracy on testing set classifier2 = learn.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], n_classes=3, steps=2000, config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) classifier2.fit(x_train, y_train, monitors=[val_monitor]) _ = accuracy_score(y_val, classifier2.predict(x_val)) _ = accuracy_score(y_test, classifier2.predict(x_test)) # Note, this test is unstable, so not checking for equality. # See stability_test for examples of stability issues. if val_monitor.early_stopped: self.assertLess(val_monitor.best_step, 2000) # Note, due to validation monitor stopping after the best score occur, # the accuracy at current checkpoint is less. # TODO(ipolosukhin): Time machine for restoring old checkpoints? # flaky, still not always best_value better then score2 value. # self.assertGreater(val_monitor.best_value, score2_val) # Early stopped, unstable so checking only < then max. self.assertLess(len(_get_summary_events(classifier2.model_dir)), 21) self.assertLess( len(_get_summary_events(classifier2.model_dir + '/eval')), 4)
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) # setup exponential decay function def exp_decay(global_step): return tf.train.exponential_decay(learning_rate=0.1, global_step=global_step, decay_steps=100, decay_rate=0.001) custom_optimizer = lambda learning_rate: tf.train.MomentumOptimizer( learning_rate, 0.9) classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=800, learning_rate=exp_decay, optimizer=custom_optimizer) classifier.fit(X_train, y_train) score = accuracy_score(y_test, classifier.predict(X_test)) self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) def custom_optimizer(learning_rate): return tf.train.MomentumOptimizer(learning_rate, 0.9) classifier = learn.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], feature_columns=learn.infer_real_valued_columns_from_input( x_train), n_classes=3, steps=400, learning_rate=0.01, optimizer=custom_optimizer) classifier.fit(x_train, y_train) score = accuracy_score(y_test, classifier.predict(x_test)) self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
def get_updated_model(log_file, data): """ :param name: :param data: :param labels: :return: >>> log_file = '/tmp/tf_examples/two_layer_final_model_DNN_32_128_10000_0.01/' >>> data = data_dict_p """ layers, steps, lr = [32, 128], 10000, .01 model = skflow.TensorFlowDNNClassifier(hidden_units=layers, n_classes=2, batch_size=128, steps=steps, learning_rate=lr) m = monitors.ValidationMonitor(data['X_train'], data['y_train'].values, every_n_steps=200) model.fit(data['X'], list(data['y'].values), logdir=log_file) _pred = model.predict(data['X']) print accuracy_score(_pred, data['y'].values) print confusion_matrix(_pred, data['y']) return model
def train_inital_tf_model(data): """ Run this only once, otherwise it take forever """ layers, steps, lr = [32, 128], 10000, .01 layers_str = str(layers).replace('[', '').replace(']', '').replace(', ', '_') log_dir = '/tmp/tf_examples/{}_DNN_{}_{}_{}/'.format( "two_layer_final_model", layers_str, steps, lr) model = skflow.TensorFlowDNNClassifier(hidden_units=layers, n_classes=2, batch_size=128, steps=steps, learning_rate=lr) m = monitors.ValidationMonitor(data['X_train'], data['y_train'].values, every_n_steps=200) model.fit(data['X_train'], list(data['y_train'].values), logdir=log_dir, monitors=[m]) _pred = model.predict(data['X']) print accuracy_score(_pred, data['y'].values) print confusion_matrix(_pred, data['y']) # model.save(log_dir) return model, log_dir
def testNoCheckpoints(self): path = tf.test.get_temp_dir() + '/tmp/tmp.saver4' random.seed(42) iris = datasets.load_iris() classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path)
def testDNNDropout0_1(self): # Dropping only a little. iris = datasets.load_iris() classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, dropout=0.1) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))
def testNoCheckpoints(self): path = tf.test.get_temp_dir() + '/tmp/tmp.saver4' random.seed(42) iris = datasets.load_iris() classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path) os.remove(os.path.join(path, 'checkpoint')) with self.assertRaises(ValueError): learn.TensorFlowEstimator.restore(path)
def testDNN(self): path = tf.test.get_temp_dir() + '/tmp_saver3' random.seed(42) iris = datasets.load_iris() classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path) new_classifier = learn.TensorFlowEstimator.restore(path) self.assertEqual(type(new_classifier), type(classifier)) score = accuracy_score(iris.target, new_classifier.predict(iris.data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testNoCheckpoints(self): path = tf.test.get_temp_dir() + '/tmp/tmp.saver4' random.seed(42) iris = datasets.load_iris() cont_features = [tf.contrib.layers.real_valued_column('', dimension=4)] classifier = learn.TensorFlowDNNClassifier( feature_columns=cont_features, hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path)
def testIrisDNN(self): if HAS_SKLEARN: random.seed(42) iris = datasets.load_iris() classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=50) grid_search = GridSearchCV(classifier, {'hidden_units': [[5, 5], [10, 10]], 'learning_rate': [0.1, 0.01]}) grid_search.fit(iris.data, iris.target) score = accuracy_score(iris.target, grid_search.predict(iris.data)) self.assertGreater(score, 0.5, 'Failed with score = {0}'.format(score))
def testIrisDNN(self): random.seed(42) iris = datasets.load_iris() classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.9, "Failed with score = {0}".format(score)) weights = classifier.weights_ self.assertEqual(weights[0].shape, (4, 10)) self.assertEqual(weights[1].shape, (10, 20)) self.assertEqual(weights[2].shape, (20, 10)) self.assertEqual(weights[3].shape, (10, 3)) biases = classifier.bias_ self.assertEqual(len(biases), 4)
if reset_seed: random.seed(42) tflr = skflow.TensorFlowLinearClassifier(n_classes=2, batch_size=128, steps=500, learning_rate=learning_rate) tflr.fit(X_train, y_train) print(accuracy_score(tflr.predict(X_test), y_test)) # 3 layer neural network with rectified linear activation. if reset_seed: random.seed(42) classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=2, batch_size=128, steps=500, learning_rate=learning_rate) classifier.fit(X_train, y_train) print(accuracy_score(classifier.predict(X_test), y_test)) # 3 layer neural network with hyperbolic tangent activation. def dnn_tanh(X, y): layers = skflow.ops.dnn(X, [20, 20, 20], tf.tanh) return skflow.models.logistic_regression(layers, y) if reset_seed: random.seed(42)