def testIrisES(self): random.seed(42) iris = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2) val_monitor = skflow.monitors.ValidationMonitor(X_val, y_val, n_classes=3) # classifier without early stopping - overfitting classifier1 = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000) classifier1.fit(X_train, y_train) score1 = accuracy_score(y_test, classifier1.predict(X_test)) # classifier with early stopping - improved accuracy on testing set classifier2 = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000) classifier2.fit(X_train, y_train, val_monitor) score2 = accuracy_score(y_test, classifier2.predict(X_test))
def testIrisStreaming(self): iris = datasets.load_iris() def iris_data(): while True: for x in iris.data: yield x def iris_predict_data(): for x in iris.data: yield x def iris_target(): while True: for y in iris.target: yield y classifier = skflow.TensorFlowLinearClassifier(n_classes=3, steps=100) classifier.fit(iris_data(), iris_target()) score1 = accuracy_score(iris.target, classifier.predict(iris.data)) score2 = accuracy_score(iris.target, classifier.predict(iris_predict_data())) self.assertGreater(score1, 0.5, "Failed with score = {0}".format(score1)) self.assertEqual(score2, score1, "Scores from {0} iterator doesn't " "match score {1} from full " "data.".format(score2, score1))
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) # setup exponential decay function def exp_decay(global_step): return tf.train.exponential_decay(learning_rate=0.1, global_step=global_step, decay_steps=100, decay_rate=0.001) custom_optimizer = lambda learning_rate: tf.train.MomentumOptimizer( learning_rate, 0.9) classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=800, learning_rate=exp_decay, optimizer=custom_optimizer) classifier.fit(X_train, y_train) score = accuracy_score(y_test, classifier.predict(X_test)) self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
def testIrisClassWeight(self): iris = datasets.load_iris() classifier = skflow.TensorFlowLinearClassifier( n_classes=3, class_weight=[0.1, 0.8, 0.1]) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertLess(score, 0.7, "Failed with score = {0}".format(score))
def testDNNDropout0_1(self): # Dropping only a little. iris = datasets.load_iris() classifier = skflow.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], n_classes=3, dropout=0.1) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))
def testIris_proba(self): # If sklearn available. if log_loss: random.seed(42) iris = datasets.load_iris() classifier = skflow.TensorFlowClassifier(n_classes=3, steps=250) classifier.fit(iris.data, iris.target) score = log_loss(iris.target, classifier.predict_proba(iris.data)) self.assertLess(score, 0.8, "Failed with score = {0}".format(score))
def testIrisContinueTraining(self): iris = datasets.load_iris() classifier = skflow.TensorFlowLinearClassifier(n_classes=3, learning_rate=0.01, continue_training=True, steps=250) classifier.fit(iris.data, iris.target) score1 = accuracy_score(iris.target, classifier.predict(iris.data)) classifier.fit(iris.data, iris.target) score2 = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score2, score1, "Failed with score = {0}".format(score2))
def test_pandas_series(self): if HAS_PANDAS: random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.Series(iris.target) classifier = skflow.TensorFlowLinearClassifier(n_classes=3) classifier.fit(data, labels) score = accuracy_score(labels, classifier.predict(data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testNoCheckpoints(self): path = tf.test.get_temp_dir() + '/tmp/tmp.saver4' random.seed(42) iris = datasets.load_iris() classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path) os.remove(os.path.join(path, 'checkpoint')) with self.assertRaises(ValueError): skflow.TensorFlowEstimator.restore(path)
def testDNN(self): path = tf.test.get_temp_dir() + '/tmp_saver3' random.seed(42) iris = datasets.load_iris() classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path) new_classifier = skflow.TensorFlowEstimator.restore(path) self.assertEqual(type(new_classifier), type(classifier)) score = accuracy_score(iris.target, new_classifier.predict(iris.data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def test_pandas_dataframe(self): if HAS_PANDAS: random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.DataFrame(iris.target) classifier = skflow.TensorFlowLinearClassifier(n_classes=3) classifier.fit(data, labels) score = accuracy_score(labels[0], classifier.predict(data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score)) else: print("No pandas installed. pandas-related tests are skipped.")
def test_dask_iris_classification(self): if HAS_DASK and HAS_PANDAS: random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) data = dd.from_pandas(data, npartitions=2) labels = pd.DataFrame(iris.target) labels = dd.from_pandas(labels, npartitions=2) classifier = skflow.TensorFlowLinearClassifier(n_classes=3) classifier.fit(data, labels) predictions = data.map_partitions(classifier.predict).compute() score = accuracy_score(labels.compute(), predictions) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testCustomModel(self): path = tf.test.get_temp_dir() + '/tmp.saver2' random.seed(42) iris = datasets.load_iris() def custom_model(X, y): return skflow.models.logistic_regression(X, y) classifier = skflow.TensorFlowEstimator(model_fn=custom_model, n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path) new_classifier = skflow.TensorFlowEstimator.restore(path) self.assertEqual(type(new_classifier), type(classifier)) score = accuracy_score(iris.target, new_classifier.predict(iris.data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testIrisDNN(self): random.seed(42) iris = datasets.load_iris() classifier = skflow.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.9, "Failed with score = {0}".format(score)) weights = classifier.weights_ self.assertEqual(weights[0].shape, (4, 10)) self.assertEqual(weights[1].shape, (10, 20)) self.assertEqual(weights[2].shape, (20, 10)) self.assertEqual(weights[3].shape, (10, 3)) biases = classifier.bias_ self.assertEqual(len(biases), 4)
def testIrisExponentialDecay(self): random.seed(42) iris = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) # setup exponential decay function def exp_decay(global_step): return tf.train.exponential_decay( learning_rate=0.1, global_step=global_step, decay_steps=100, decay_rate=0.001) classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=800, learning_rate=exp_decay) classifier.fit(X_train, y_train) score = accuracy_score(y_test, classifier.predict(X_test)) self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
def testIris(self): iris = datasets.load_iris() classifier = skflow.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, [float(x) for x in iris.target]) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
def testIrisSummaries(self): iris = datasets.load_iris() classifier = skflow.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, iris.target, logdir='/tmp/skflow_tests/') score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))