def testIrisStreaming(self): iris = datasets.load_iris() def iris_data(): while True: for x in iris.data: yield x def iris_predict_data(): for x in iris.data: yield x def iris_target(): while True: for y in iris.target: yield y classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3, steps=100) classifier.fit(iris_data(), iris_target()) score1 = accuracy_score(iris.target, classifier.predict(iris.data)) score2 = accuracy_score(iris.target, classifier.predict(iris_predict_data())) self.assertGreater(score1, 0.5, "Failed with score = {0}".format(score1)) self.assertEqual(score2, score1, "Scores from {0} iterator doesn't " "match score {1} from full " "data.".format(score2, score1))
def testIris(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3) classifier.fit(iris.data, [x for x in iris.target]) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
def testBoston(self): random.seed(42) boston = datasets.load_boston() regressor = learn.LinearRegressor( feature_columns=learn.infer_real_valued_columns_from_input(boston.data)) regressor.fit(boston.data, boston.target, max_steps=500) score = mean_squared_error(boston.target, regressor.predict(boston.data)) self.assertLess(score, 150, "Failed with score = {0}".format(score))
def testIrisSummaries(self): iris = datasets.load_iris() output_dir = tempfile.mkdtemp() + "learn_tests/" classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3, model_dir=output_dir) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testOneDim(self): random.seed(42) x = np.random.rand(1000) y = 2 * x + 3 feature_columns = learn.infer_real_valued_columns_from_input(x) regressor = learn.TensorFlowLinearRegressor(feature_columns=feature_columns) regressor.fit(x, y) score = mean_squared_error(y, regressor.predict(x)) self.assertLess(score, 1.0, "Failed with score = {0}".format(score))
def get_classification_score(train_encodings, train_labels, test_encodings, test_labels, steps): feature_columns = learn.infer_real_valued_columns_from_input(train_encodings) classifier = learn.DNNClassifier(hidden_units=[32, 16], n_classes=10, feature_columns=feature_columns) classifier.fit(train_encodings, train_labels, steps=steps, batch_size=32) # For measuring accuracy test_predictions = list(classifier.predict(test_encodings, as_iterable=True)) score = metrics.accuracy_score(test_labels, test_predictions) return score
def testIrisClassWeight(self): iris = datasets.load_iris() # Note, class_weight are not supported anymore :( Use weight_column. with self.assertRaises(ValueError): classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3, class_weight=[0.1, 0.8, 0.1]) classifier.fit(iris.data, iris.target) score = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertLess(score, 0.7, "Failed with score = {0}".format(score))
def testMultiRegression(self): random.seed(42) rng = np.random.RandomState(1) x = np.sort(200 * rng.rand(100, 1) - 100, axis=0) y = np.array([np.pi * np.sin(x).ravel(), np.pi * np.cos(x).ravel()]).T regressor = learn.LinearRegressor( feature_columns=learn.infer_real_valued_columns_from_input(x), target_dimension=2) regressor.fit(x, y, steps=100) score = mean_squared_error(regressor.predict(x), y) self.assertLess(score, 10, "Failed with score = {0}".format(score))
def testIris_proba(self): # If sklearn available. if log_loss: random.seed(42) iris = datasets.load_iris() classifier = learn.TensorFlowClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3, steps=250) classifier.fit(iris.data, iris.target) score = log_loss(iris.target, classifier.predict_proba(iris.data)) self.assertLess(score, 0.8, "Failed with score = {0}".format(score))
def testBoston(self): random.seed(42) boston = datasets.load_boston() regressor = learn.TensorFlowLinearRegressor( feature_columns=learn.infer_real_valued_columns_from_input(boston.data), batch_size=boston.data.shape[0], steps=500, learning_rate=0.001) regressor.fit(boston.data, boston.target) score = mean_squared_error(boston.target, regressor.predict(boston.data)) self.assertLess(score, 150, "Failed with score = {0}".format(score))
def testIrisContinueTraining(self): iris = datasets.load_iris() classifier = learn.LinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3) classifier.fit(iris.data, iris.target, steps=100) score1 = accuracy_score(iris.target, classifier.predict(iris.data)) classifier.fit(iris.data, iris.target, steps=500) score2 = accuracy_score(iris.target, classifier.predict(iris.data)) self.assertGreater( score2, score1, "Failed with score2 {0} <= score1 {1}".format(score2, score1))
def testIrisES(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor( x_val, y_val, every_n_steps=50, early_stopping_rounds=100, early_stopping_metric='loss', early_stopping_metric_minimize=False) feature_columns = learn.infer_real_valued_columns_from_input(iris.data) # classifier without early stopping - overfitting classifier1 = learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) classifier1.fit(x_train, y_train, steps=1000) _ = accuracy_score(y_test, classifier1.predict(x_test)) # Full 1000 steps, 19 summaries and no evaluation summary: # 1 summary of net at step 1 # 9 x (1 summary of net and 1 summary of global step) for steps 101, 201,... self.assertEqual(19, len(_get_summary_events(classifier1.model_dir))) with self.assertRaises(ValueError): _get_summary_events(classifier1.model_dir + '/eval') # classifier with early stopping - improved accuracy on testing set classifier2 = learn.DNNClassifier( hidden_units=[10, 20, 10], feature_columns=feature_columns, n_classes=3, config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) classifier2.fit(x_train, y_train, monitors=[val_monitor], steps=2000) _ = accuracy_score(y_val, classifier2.predict(x_val)) _ = accuracy_score(y_test, classifier2.predict(x_test)) # Note, this test is unstable, so not checking for equality. # See stability_test for examples of stability issues. if val_monitor.early_stopped: self.assertLess(val_monitor.best_step, 2000) # Note, due to validation monitor stopping after the best score occur, # the accuracy at current checkpoint is less. # TODO(ipolosukhin): Time machine for restoring old checkpoints? # flaky, still not always best_value better then score2 value. # self.assertGreater(val_monitor.best_value, score2_val) # Early stopped, unstable so checking only < then max. self.assertLess(len(_get_summary_events(classifier2.model_dir)), 21) # Eval typically has ~6 events, but it varies based on the run. self.assertLess(len(_get_summary_events( classifier2.model_dir + '/eval')), 8)
def test_pandas_series(self): if HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.Series(iris.target) classifier = learn.LinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels, steps=100) score = accuracy_score(labels, list(classifier.predict(data))) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def test_pandas_dataframe(self): if HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) labels = pd.DataFrame(iris.target) classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels) score = accuracy_score(labels[0], classifier.predict(data)) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score)) else: print("No pandas installed. pandas-related tests are skipped.")
def testLinearRegression(self): rng = np.random.RandomState(67) n = 1000 n_weights = 10 bias = 2 x = rng.uniform(-1, 1, (n, n_weights)) weights = 10 * rng.randn(n_weights) y = np.dot(x, weights) y += rng.randn(len(x)) * 0.05 + rng.normal(bias, 0.01) regressor = learn.TensorFlowLinearRegressor( feature_columns=learn.infer_real_valued_columns_from_input(x), optimizer="SGD") regressor.fit(x, y, steps=200) # Have to flatten weights since they come in (x, 1) shape. self.assertAllClose(weights, regressor.weights_.flatten(), rtol=0.01)
def testIrisDNN(self): if HAS_SKLEARN: random.seed(42) iris = datasets.load_iris() feature_columns = learn.infer_real_valued_columns_from_input(iris.data) classifier = learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) grid_search = GridSearchCV(classifier, {'hidden_units': [[5, 5], [10, 10]]}, scoring='accuracy', fit_params={'steps': [50]}) grid_search.fit(iris.data, iris.target) score = accuracy_score(iris.target, grid_search.predict(iris.data)) self.assertGreater(score, 0.5, 'Failed with score = {0}'.format(score))
def testIrisAllVariables(self): iris = datasets.load_iris() classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3) classifier.fit(iris.data, [x for x in iris.target]) self.assertEqual( classifier.get_variable_names(), ["centered_bias_weight", "centered_bias_weight/Adagrad", "global_step", "linear/_weight", "linear/_weight/Ftrl", "linear/_weight/Ftrl_1", "linear/bias_weight", "linear/bias_weight/Ftrl", "linear/bias_weight/Ftrl_1"])
def test_dask_iris_classification(self): if HAS_DASK and HAS_PANDAS: import pandas as pd # pylint: disable=g-import-not-at-top import dask.dataframe as dd # pylint: disable=g-import-not-at-top random.seed(42) iris = datasets.load_iris() data = pd.DataFrame(iris.data) data = dd.from_pandas(data, npartitions=2) labels = pd.DataFrame(iris.target) labels = dd.from_pandas(labels, npartitions=2) classifier = learn.LinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels, steps=100) predictions = data.map_partitions(classifier.predict).compute() score = accuracy_score(labels.compute(), predictions) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
def testIrisAllVariables(self): iris = datasets.load_iris() classifier = learn.LinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3) classifier.fit(iris.data, [x for x in iris.target], max_steps=100) self.assertEqual( classifier.get_variable_names(), ["centered_bias_weight", "centered_bias_weight/Adagrad", "global_step", # Double slashes appear because the column name is empty. If it was not # empty, the variable names would be "linear/column_name/weight" etc. "linear//weight", "linear//weight/Ftrl", "linear//weight/Ftrl_1", "linear/bias_weight", "linear/bias_weight/Ftrl", "linear/bias_weight/Ftrl_1"])
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) def custom_optimizer(): return tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9) classifier = learn.DNNClassifier( hidden_units=[10, 20, 10], feature_columns=learn.infer_real_valued_columns_from_input(x_train), n_classes=3, optimizer=custom_optimizer, config=learn.RunConfig(tf_random_seed=1), ) classifier.fit(x_train, y_train, steps=400) score = accuracy_score(y_test, classifier.predict(x_test)) self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
# # boston = datasets.load_boston() # # X, y = boston.data, boston.target # # X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=33) # # scaler = preprocessing.StandardScaler() # X_train = scaler.fit_transform(X_train) # X_test = scaler.fit(X_test) # # feature_columns = learn.infer_real_valued_columns_from_input(X_train) # tf_lr = learn.LinearRegressor(feature_columns=feature_columns) # tf_lr.fit(X_train, y_train, steps=10000, batch_size=50) # # tf_lr_y_predict = tf_lr.predict(X_test) # print(metrics.mean_absolute_error(tf_lr_y_predict, y_test)) import tensorflow.contrib.learn.python.learn as learn from sklearn import datasets, metrics, preprocessing boston = datasets.load_boston() x = preprocessing.StandardScaler().fit_transform(boston.data) feature_columns = learn.infer_real_valued_columns_from_input(x) regressor = learn.LinearRegressor(feature_columns=feature_columns) regressor.fit(x, boston.target, steps=200, batch_size=32) boston_predictions = list(regressor.predict(x, as_iterable=True)) score = metrics.mean_squared_error(boston_predictions, boston.target) print("MSE: %f" % score)
n_train = int(0.6 * atom_data.shape[0]) n_CV = int(0.2 * atom_data.shape[0]) # For the test size, subtract to eliminate rounding issues. n_test = int(atom_data.shape[0] - n_train - n_CV) y_train = x[0:n_train, 0] x_train = x[0:n_train, 1:-1] y_CV = x[n_train:n_train + n_CV, 0] x_CV = x[n_train:n_train + n_CV, 1:-1] y_test = x[n_train + n_CV:-1, 0] x_test = x[n_train + n_CV:-1, 1:-1] # And finally convert to feature columns features_train = learn.infer_real_valued_columns_from_input(x_train) features_CV = learn.infer_real_valued_columns_from_input(x_CV) features_test = learn.infer_real_valued_columns_from_input(x_test) # Next we set up the regressor. This uses the much simplified approach # of the learn contrib team. They have a lingo for creating custom # regressors, so really that's probably the way for me to approach this # to avoid as many errors as possible. regressor = learn.DNNRegressor(feature_columns=features_train, hidden_units=[1000, 100, 100, 100, 100, 100], model_dir='model/') #regressor = learn.LinearRegressor(feature_columns=features_train, model_dir = 'model/') # And this is the section that actually runs the regressor. Note that I # no longer need to bother with things like figuring out batching or
X = scaler.transform(X) # Create results vector (a home win = 1, a home loss or tie = 0) y = np.array(np.where(df['home_score'] > df['away_score'], 1, 0)) # Delete the dataframe to clear memory del df # Split out training and testing data sets X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=42) # Remove the 'week' 'home_team' and 'away_team' columns from matchups as they are not used in the algorithm matchups.drop(['week', 'home_team', 'away_team'], axis=1, inplace=True) # Build 3 layer fully connected DNN with 50, 50, 50 units respectively. feature_columns = learn.infer_real_valued_columns_from_input(X_train) regressor = learn.DNNRegressor(feature_columns=feature_columns, hidden_units=[100, 100, 100]) # Fit regressor.fit(X_train, y_train, steps=500) # Predict and score y_predicted = list(regressor.predict(x=scaler.transform(matchups), as_iterable=True)) print y_predicted raw_input min_val = min(y_predicted) max_val = max(y_predicted) y_predicted = (y_predicted - min_val) / (max_val - min_val)
from sklearn import datasets, metrics, preprocessing, cross_validation import numpy as np boston = datasets.load_boston() X, y = boston.data, boston.target X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, \ test_size = 0.25, random_state =33) # 对数据特征进行标准化处理 scaler = preprocessing.StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) #@@ 据说skflow已经被集成到TensorFlow的learn中了,所以有下面的写法 #@@ 但是下面的这个仍然存在问题 import tensorflow.contrib.learn.python.learn as learn import tensorflow as tf # 使用skflow的LinearRegressor tf_lr = learn.LinearRegressor(feature_columns=learn.infer_real_valued_columns_from_input(X_train), \ optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.01)) #@@ optimizer是设置优化器的,默认的梯度0.2会发生梯度爆炸 tf_lr.fit(X_train, y_train, steps=1000, batch_size=50) tf_lr_y_predict = tf_lr.predict(X_test) tf_lr_y_predict = np.array(list(tf_lr_y_predict)) print('absoluate error:', metrics.mean_absolute_error(tf_lr_y_predict, y_test), '\n') print('mean squared error:', metrics.mean_squared_error(tf_lr_y_predict, y_test), '\n') print('R-squared value:', metrics.r2_score(tf_lr_y_predict, y_test))