def testIris(self): path = tf.test.get_temp_dir() + '/tmp.saver' random.seed(42) iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path)
def testIrisClassWeight(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier( n_classes=3, class_weight=[0.1, 0.8, 0.1]) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertLess(score, 0.7, "Failed with score = {0}".format(score))
def testIrisStreaming(self): iris = datasets.load_iris() def iris_data(): while True: for x in iris.data: yield x def iris_predict_data(): for x in iris.data: yield x def iris_target(): while True: for y in iris.target: yield y classifier = learn.TensorFlowLinearClassifier(n_classes=3, steps=100) classifier.fit(iris_data(), iris_target()) score1 = accuracy_score(iris.target, classifier.predict(iris.data)) score2 = accuracy_score(iris.target, classifier.predict(iris_predict_data())) self.assertGreater(score1, 0.5, "Failed with score = {0}".format(score1)) self.assertEqual(score2, score1, "Scores from {0} iterator doesn't " "match score {1} from full " "data.".format(score2, score1))
def testIris(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3) classifier.fit(iris.data, [x for x in iris.target]) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
def testIrisSummaries(self): iris = datasets.load_iris() output_dir = tempfile.mkdtemp() + "learn_tests/" classifier = learn.TensorFlowLinearClassifier(n_classes=3, model_dir=output_dir) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testIris(self): path = tf.test.get_temp_dir() + '/tmp.saver' random.seed(42) iris = datasets.load_iris() cont_features = [tf.contrib.layers.real_valued_column('', dimension=4)] classifier = learn.TensorFlowLinearClassifier( feature_columns=cont_features, n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path)
def testIrisClassWeight(self): iris = datasets.load_iris() # Note, class_weight are not supported anymore :( Use weight_column. with self.assertRaises(ValueError): classifier = learn.TensorFlowLinearClassifier( n_classes=3, class_weight=[0.1, 0.8, 0.1]) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertLess(score, 0.7, "Failed with score = {0}".format(score))
def testIris(self): path = tf.test.get_temp_dir() + '/tmp.saver' random.seed(42) iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, iris.target) classifier.save(path) new_classifier = learn.TensorFlowEstimator.restore(path) self.assertEqual(type(new_classifier), type(classifier)) score = accuracy_score(iris.target, new_classifier.predict(iris.data)) self.assertGreater(score, 0.5, 'Failed with score = {0}'.format(score))
def test_pandas_series(self): if HAS_PANDAS: import pandas as pd random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.Series(iris.target) classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(data, labels) score = accuracy_score(labels, classifier.predict(data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testIrisAllVariables(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, [float(x) for x in iris.target]) self.assertEqual(classifier.get_variable_names(), [ "OptimizeLoss/learning_rate", "OptimizeLoss/logistic_regression/bias/Adagrad", "OptimizeLoss/logistic_regression/softmax_classifier/" "softmax_cross_entropy_loss/value/avg", "OptimizeLoss/logistic_regression/weights/Adagrad", "global_step", "logistic_regression/bias", "logistic_regression/weights" ])
def testIrisContinueTraining(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier(n_classes=3, learning_rate=0.01, continue_training=True, steps=250) classifier.fit(iris.data, iris.target) score1 = accuracy_score(iris.target, classifier.predict(iris.data)) classifier.fit(iris.data, iris.target) score2 = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score2, score1, "Failed with score = {0}".format(score2))
def test_pandas_series(self): if HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.Series(iris.target) classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels) score = accuracy_score(labels, classifier.predict(data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def test_pandas_dataframe(self): if HAS_PANDAS: import pandas as pd random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.DataFrame(iris.target) classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(data, labels) score = accuracy_score(labels[0], classifier.predict(data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score)) else: print("No pandas installed. pandas-related tests are skipped.")
def test_dask_iris_classification(self): if HAS_DASK and HAS_PANDAS: random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) data = dd.from_pandas(data, npartitions=2) labels = pd.DataFrame(iris.target) labels = dd.from_pandas(labels, npartitions=2) classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(data, labels) predictions = data.map_partitions(classifier.predict).compute() score = accuracy_score(labels.compute(), predictions) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testIrisAllVariables(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, [x for x in iris.target]) self.assertEqual( classifier.get_variable_names(), ["centered_bias_weight", "centered_bias_weight/Adagrad", "global_step", "linear/_weight", "linear/_weight/Ftrl", "linear/_weight/Ftrl_1", "linear/bias_weight", "linear/bias_weight/Ftrl", "linear/bias_weight/Ftrl_1"])
def testIrisContinueTraining(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3, learning_rate=0.01, continue_training=True, steps=250) classifier.fit(iris.data, iris.target) score1 = accuracy_score(iris.target, classifier.predict(iris.data)) classifier.fit(iris.data, iris.target, steps=500) score2 = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater( score2, score1, "Failed with score2 {0} <= score1 {1}".format(score2, score1))
def test_dask_iris_classification(self): if HAS_DASK and HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top import dask.dataframe as dd # pylint: disable=g-import-not-at-top random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) data = dd.from_pandas(data, npartitions=2) labels = pd.DataFrame(iris.target) labels = dd.from_pandas(labels, npartitions=2) classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels) predictions = data.map_partitions(classifier.predict).compute() score = accuracy_score(labels.compute(), predictions) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testIrisAllVariables(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, [x for x in iris.target]) self.assertEqual( classifier.get_variable_names(), [ "centered_bias_weight", "centered_bias_weight/Adagrad", "global_step", # Double slashes appear because the column name is empty. If it was not # empty, the variable names would be "linear/column_name/_weight" etc. "linear//_weight", "linear//_weight/Ftrl", "linear//_weight/Ftrl_1", "linear/bias_weight", "linear/bias_weight/Ftrl", "linear/bias_weight/Ftrl_1" ])
def testIris(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, [x for x in iris.target]) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) lr = LogisticRegression() lr.fit(X_train, y_train) print(accuracy_score(lr.predict(X_test), y_test)) # Linear classifier. if reset_seed: random.seed(42) tflr = skflow.TensorFlowLinearClassifier(n_classes=2, batch_size=128, steps=500, learning_rate=learning_rate) tflr.fit(X_train, y_train) print(accuracy_score(tflr.predict(X_test), y_test)) # 3 layer neural network with rectified linear activation. if reset_seed: random.seed(42) classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=2, batch_size=128, steps=500, learning_rate=learning_rate) classifier.fit(X_train, y_train) print(accuracy_score(classifier.predict(X_test), y_test))
def testIrisSummaries(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier(n_classes=3) classifier.fit(iris.data, iris.target, logdir="/tmp/learn_tests/") score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))