Ejemplo n.º 1
0
  def testIrisStreaming(self):
    iris = datasets.load_iris()

    def iris_data():
      while True:
        for x in iris.data:
          yield x

    def iris_predict_data():
      for x in iris.data:
        yield x

    def iris_target():
      while True:
        for y in iris.target:
          yield y

    classifier = learn.TensorFlowLinearClassifier(
        feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
        n_classes=3, steps=100)
    classifier.fit(iris_data(), iris_target())
    score1 = accuracy_score(iris.target, classifier.predict(iris.data))
    score2 = accuracy_score(iris.target,
                            classifier.predict(iris_predict_data()))
    self.assertGreater(score1, 0.5, "Failed with score = {0}".format(score1))
    self.assertEqual(score2, score1, "Scores from {0} iterator doesn't "
                     "match score {1} from full "
                     "data.".format(score2, score1))
Ejemplo n.º 2
0
 def testIris(self):
   iris = datasets.load_iris()
   classifier = learn.TensorFlowLinearClassifier(
       feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
       n_classes=3)
   classifier.fit(iris.data, [x for x in iris.target])
   score = accuracy_score(iris.target, classifier.predict(iris.data))
   self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
Ejemplo n.º 3
0
 def testBoston(self):
   random.seed(42)
   boston = datasets.load_boston()
   regressor = learn.LinearRegressor(
       feature_columns=learn.infer_real_valued_columns_from_input(boston.data))
   regressor.fit(boston.data, boston.target, max_steps=500)
   score = mean_squared_error(boston.target, regressor.predict(boston.data))
   self.assertLess(score, 150, "Failed with score = {0}".format(score))
Ejemplo n.º 4
0
 def testIrisSummaries(self):
   iris = datasets.load_iris()
   output_dir = tempfile.mkdtemp() + "learn_tests/"
   classifier = learn.TensorFlowLinearClassifier(
       feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
       n_classes=3, model_dir=output_dir)
   classifier.fit(iris.data, iris.target)
   score = accuracy_score(iris.target, classifier.predict(iris.data))
   self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
Ejemplo n.º 5
0
 def testOneDim(self):
   random.seed(42)
   x = np.random.rand(1000)
   y = 2 * x + 3
   feature_columns = learn.infer_real_valued_columns_from_input(x)
   regressor = learn.TensorFlowLinearRegressor(feature_columns=feature_columns)
   regressor.fit(x, y)
   score = mean_squared_error(y, regressor.predict(x))
   self.assertLess(score, 1.0, "Failed with score = {0}".format(score))
def get_classification_score(train_encodings, train_labels, test_encodings, test_labels, steps):
    feature_columns = learn.infer_real_valued_columns_from_input(train_encodings)
    classifier = learn.DNNClassifier(hidden_units=[32, 16], n_classes=10, feature_columns=feature_columns)
    classifier.fit(train_encodings, train_labels, steps=steps, batch_size=32)

    # For measuring accuracy
    test_predictions = list(classifier.predict(test_encodings, as_iterable=True))
    score = metrics.accuracy_score(test_labels, test_predictions)
    return score
Ejemplo n.º 7
0
 def testIrisClassWeight(self):
   iris = datasets.load_iris()
   # Note, class_weight are not supported anymore :( Use weight_column.
   with self.assertRaises(ValueError):
     classifier = learn.TensorFlowLinearClassifier(
         feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
         n_classes=3, class_weight=[0.1, 0.8, 0.1])
     classifier.fit(iris.data, iris.target)
     score = accuracy_score(iris.target, classifier.predict(iris.data))
     self.assertLess(score, 0.7, "Failed with score = {0}".format(score))
Ejemplo n.º 8
0
 def testMultiRegression(self):
   random.seed(42)
   rng = np.random.RandomState(1)
   x = np.sort(200 * rng.rand(100, 1) - 100, axis=0)
   y = np.array([np.pi * np.sin(x).ravel(), np.pi * np.cos(x).ravel()]).T
   regressor = learn.LinearRegressor(
       feature_columns=learn.infer_real_valued_columns_from_input(x),
       target_dimension=2)
   regressor.fit(x, y, steps=100)
   score = mean_squared_error(regressor.predict(x), y)
   self.assertLess(score, 10, "Failed with score = {0}".format(score))
Ejemplo n.º 9
0
 def testIris_proba(self):
   # If sklearn available.
   if log_loss:
     random.seed(42)
     iris = datasets.load_iris()
     classifier = learn.TensorFlowClassifier(
         feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
         n_classes=3, steps=250)
     classifier.fit(iris.data, iris.target)
     score = log_loss(iris.target, classifier.predict_proba(iris.data))
     self.assertLess(score, 0.8, "Failed with score = {0}".format(score))
Ejemplo n.º 10
0
 def testBoston(self):
   random.seed(42)
   boston = datasets.load_boston()
   regressor = learn.TensorFlowLinearRegressor(
       feature_columns=learn.infer_real_valued_columns_from_input(boston.data),
       batch_size=boston.data.shape[0],
       steps=500,
       learning_rate=0.001)
   regressor.fit(boston.data, boston.target)
   score = mean_squared_error(boston.target, regressor.predict(boston.data))
   self.assertLess(score, 150, "Failed with score = {0}".format(score))
Ejemplo n.º 11
0
 def testIrisContinueTraining(self):
   iris = datasets.load_iris()
   classifier = learn.LinearClassifier(
       feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
       n_classes=3)
   classifier.fit(iris.data, iris.target, steps=100)
   score1 = accuracy_score(iris.target, classifier.predict(iris.data))
   classifier.fit(iris.data, iris.target, steps=500)
   score2 = accuracy_score(iris.target, classifier.predict(iris.data))
   self.assertGreater(
       score2, score1,
       "Failed with score2 {0} <= score1 {1}".format(score2, score1))
Ejemplo n.º 12
0
  def testIrisES(self):
    random.seed(42)

    iris = datasets.load_iris()
    x_train, x_test, y_train, y_test = train_test_split(
        iris.data, iris.target, test_size=0.2, random_state=42)

    x_train, x_val, y_train, y_val = train_test_split(
        x_train, y_train, test_size=0.2, random_state=42)
    val_monitor = learn.monitors.ValidationMonitor(
        x_val, y_val, every_n_steps=50, early_stopping_rounds=100,
        early_stopping_metric='loss', early_stopping_metric_minimize=False)

    feature_columns = learn.infer_real_valued_columns_from_input(iris.data)

    # classifier without early stopping - overfitting
    classifier1 = learn.DNNClassifier(
        feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3)
    classifier1.fit(x_train, y_train, steps=1000)
    _ = accuracy_score(y_test, classifier1.predict(x_test))

    # Full 1000 steps, 19 summaries and no evaluation summary:
    # 1 summary of net at step 1
    # 9 x (1 summary of net and 1 summary of global step) for steps 101, 201,...
    self.assertEqual(19, len(_get_summary_events(classifier1.model_dir)))
    with self.assertRaises(ValueError):
      _get_summary_events(classifier1.model_dir + '/eval')

    # classifier with early stopping - improved accuracy on testing set
    classifier2 = learn.DNNClassifier(
        hidden_units=[10, 20, 10], feature_columns=feature_columns, n_classes=3,
        config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))

    classifier2.fit(x_train, y_train, monitors=[val_monitor], steps=2000)
    _ = accuracy_score(y_val, classifier2.predict(x_val))
    _ = accuracy_score(y_test, classifier2.predict(x_test))

    # Note, this test is unstable, so not checking for equality.
    # See stability_test for examples of stability issues.
    if val_monitor.early_stopped:
      self.assertLess(val_monitor.best_step, 2000)
      # Note, due to validation monitor stopping after the best score occur,
      # the accuracy at current checkpoint is less.
      # TODO(ipolosukhin): Time machine for restoring old checkpoints?
      # flaky, still not always best_value better then score2 value.
      # self.assertGreater(val_monitor.best_value, score2_val)

      # Early stopped, unstable so checking only < then max.
      self.assertLess(len(_get_summary_events(classifier2.model_dir)), 21)
      # Eval typically has ~6 events, but it varies based on the run.
      self.assertLess(len(_get_summary_events(
          classifier2.model_dir + '/eval')), 8)
Ejemplo n.º 13
0
 def test_pandas_series(self):
   if HAS_PANDAS:
     import pandas as pd  # pylint: disable=g-import-not-at-top
     random.seed(42)
     iris = datasets.load_iris()
     data = pd.DataFrame(iris.data)
     labels = pd.Series(iris.target)
     classifier = learn.LinearClassifier(
         feature_columns=learn.infer_real_valued_columns_from_input(data),
         n_classes=3)
     classifier.fit(data, labels, steps=100)
     score = accuracy_score(labels, list(classifier.predict(data)))
     self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
Ejemplo n.º 14
0
 def test_pandas_dataframe(self):
   if HAS_PANDAS:
     import pandas as pd  # pylint: disable=g-import-not-at-top
     random.seed(42)
     iris = datasets.load_iris()
     data = pd.DataFrame(iris.data)
     labels = pd.DataFrame(iris.target)
     classifier = learn.TensorFlowLinearClassifier(
         feature_columns=learn.infer_real_valued_columns_from_input(data),
         n_classes=3)
     classifier.fit(data, labels)
     score = accuracy_score(labels[0], classifier.predict(data))
     self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
   else:
     print("No pandas installed. pandas-related tests are skipped.")
Ejemplo n.º 15
0
 def testLinearRegression(self):
   rng = np.random.RandomState(67)
   n = 1000
   n_weights = 10
   bias = 2
   x = rng.uniform(-1, 1, (n, n_weights))
   weights = 10 * rng.randn(n_weights)
   y = np.dot(x, weights)
   y += rng.randn(len(x)) * 0.05 + rng.normal(bias, 0.01)
   regressor = learn.TensorFlowLinearRegressor(
       feature_columns=learn.infer_real_valued_columns_from_input(x),
       optimizer="SGD")
   regressor.fit(x, y, steps=200)
   # Have to flatten weights since they come in (x, 1) shape.
   self.assertAllClose(weights, regressor.weights_.flatten(), rtol=0.01)
Ejemplo n.º 16
0
 def testIrisDNN(self):
   if HAS_SKLEARN:
     random.seed(42)
     iris = datasets.load_iris()
     feature_columns = learn.infer_real_valued_columns_from_input(iris.data)
     classifier = learn.DNNClassifier(
         feature_columns=feature_columns, hidden_units=[10, 20, 10],
         n_classes=3)
     grid_search = GridSearchCV(classifier,
                                {'hidden_units': [[5, 5], [10, 10]]},
                                scoring='accuracy',
                                fit_params={'steps': [50]})
     grid_search.fit(iris.data, iris.target)
     score = accuracy_score(iris.target, grid_search.predict(iris.data))
     self.assertGreater(score, 0.5, 'Failed with score = {0}'.format(score))
Ejemplo n.º 17
0
 def testIrisAllVariables(self):
   iris = datasets.load_iris()
   classifier = learn.TensorFlowLinearClassifier(
       feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
       n_classes=3)
   classifier.fit(iris.data, [x for x in iris.target])
   self.assertEqual(
       classifier.get_variable_names(),
       ["centered_bias_weight",
        "centered_bias_weight/Adagrad",
        "global_step",
        "linear/_weight",
        "linear/_weight/Ftrl",
        "linear/_weight/Ftrl_1",
        "linear/bias_weight",
        "linear/bias_weight/Ftrl",
        "linear/bias_weight/Ftrl_1"])
Ejemplo n.º 18
0
 def test_dask_iris_classification(self):
   if HAS_DASK and HAS_PANDAS:
     import pandas as pd  # pylint: disable=g-import-not-at-top
     import dask.dataframe as dd  # pylint: disable=g-import-not-at-top
     random.seed(42)
     iris = datasets.load_iris()
     data = pd.DataFrame(iris.data)
     data = dd.from_pandas(data, npartitions=2)
     labels = pd.DataFrame(iris.target)
     labels = dd.from_pandas(labels, npartitions=2)
     classifier = learn.LinearClassifier(
         feature_columns=learn.infer_real_valued_columns_from_input(data),
         n_classes=3)
     classifier.fit(data, labels, steps=100)
     predictions = data.map_partitions(classifier.predict).compute()
     score = accuracy_score(labels.compute(), predictions)
     self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
Ejemplo n.º 19
0
 def testIrisAllVariables(self):
   iris = datasets.load_iris()
   classifier = learn.LinearClassifier(
       feature_columns=learn.infer_real_valued_columns_from_input(iris.data),
       n_classes=3)
   classifier.fit(iris.data, [x for x in iris.target], max_steps=100)
   self.assertEqual(
       classifier.get_variable_names(),
       ["centered_bias_weight",
        "centered_bias_weight/Adagrad",
        "global_step",
        # Double slashes appear because the column name is empty. If it was not
        # empty, the variable names would be "linear/column_name/weight" etc.
        "linear//weight",
        "linear//weight/Ftrl",
        "linear//weight/Ftrl_1",
        "linear/bias_weight",
        "linear/bias_weight/Ftrl",
        "linear/bias_weight/Ftrl_1"])
Ejemplo n.º 20
0
    def testIrisMomentum(self):
        random.seed(42)

        iris = datasets.load_iris()
        x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

        def custom_optimizer():
            return tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)

        classifier = learn.DNNClassifier(
            hidden_units=[10, 20, 10],
            feature_columns=learn.infer_real_valued_columns_from_input(x_train),
            n_classes=3,
            optimizer=custom_optimizer,
            config=learn.RunConfig(tf_random_seed=1),
        )
        classifier.fit(x_train, y_train, steps=400)
        score = accuracy_score(y_test, classifier.predict(x_test))

        self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
#
# boston = datasets.load_boston()
#
# X, y = boston.data, boston.target
#
# X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=33)
#
# scaler = preprocessing.StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.fit(X_test)
#
# feature_columns = learn.infer_real_valued_columns_from_input(X_train)
# tf_lr = learn.LinearRegressor(feature_columns=feature_columns)
# tf_lr.fit(X_train, y_train, steps=10000, batch_size=50)
#
# tf_lr_y_predict = tf_lr.predict(X_test)

# print(metrics.mean_absolute_error(tf_lr_y_predict, y_test))

import tensorflow.contrib.learn.python.learn as learn
from sklearn import datasets, metrics, preprocessing

boston = datasets.load_boston()
x = preprocessing.StandardScaler().fit_transform(boston.data)
feature_columns = learn.infer_real_valued_columns_from_input(x)
regressor = learn.LinearRegressor(feature_columns=feature_columns)
regressor.fit(x, boston.target, steps=200, batch_size=32)
boston_predictions = list(regressor.predict(x, as_iterable=True))
score = metrics.mean_squared_error(boston_predictions, boston.target)
print("MSE: %f" % score)
Ejemplo n.º 22
0
n_train = int(0.6 * atom_data.shape[0])
n_CV = int(0.2 * atom_data.shape[0])

# For the test size, subtract to eliminate rounding issues.
n_test = int(atom_data.shape[0] - n_train - n_CV)

y_train = x[0:n_train, 0]
x_train = x[0:n_train, 1:-1]
y_CV = x[n_train:n_train + n_CV, 0]
x_CV = x[n_train:n_train + n_CV, 1:-1]
y_test = x[n_train + n_CV:-1, 0]
x_test = x[n_train + n_CV:-1, 1:-1]

# And finally convert to feature columns

features_train = learn.infer_real_valued_columns_from_input(x_train)
features_CV = learn.infer_real_valued_columns_from_input(x_CV)
features_test = learn.infer_real_valued_columns_from_input(x_test)

# Next we set up the regressor. This uses the much simplified approach
# of the learn contrib team. They have a lingo for creating custom
# regressors, so really that's probably the way for me to approach this
# to avoid as many errors as possible.

regressor = learn.DNNRegressor(feature_columns=features_train,
                               hidden_units=[1000, 100, 100, 100, 100, 100],
                               model_dir='model/')
#regressor = learn.LinearRegressor(feature_columns=features_train, model_dir = 'model/')

# And this is the section that actually runs the regressor. Note that I
# no longer need to bother with things like figuring out batching or
X = scaler.transform(X)

# Create results vector (a home win = 1, a home loss or tie = 0)
y = np.array(np.where(df['home_score'] > df['away_score'], 1, 0))

# Delete the dataframe to clear memory
del df

# Split out training and testing data sets
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,y,test_size=0.2,random_state=42)

# Remove the 'week' 'home_team' and 'away_team' columns from matchups as they are not used in the algorithm
matchups.drop(['week', 'home_team', 'away_team'], axis=1, inplace=True)

# Build 3 layer fully connected DNN with 50, 50, 50 units respectively.
feature_columns = learn.infer_real_valued_columns_from_input(X_train)
regressor = learn.DNNRegressor(feature_columns=feature_columns, hidden_units=[100, 100, 100])

# Fit
regressor.fit(X_train, y_train, steps=500)

# Predict and score
y_predicted = list(regressor.predict(x=scaler.transform(matchups), as_iterable=True))

print y_predicted
raw_input

min_val = min(y_predicted)
max_val = max(y_predicted)
y_predicted = (y_predicted - min_val) / (max_val - min_val)
Ejemplo n.º 24
0
from sklearn import datasets, metrics, preprocessing, cross_validation
import numpy as np

boston = datasets.load_boston()
X, y = boston.data, boston.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, \
                            test_size = 0.25, random_state =33)
# 对数据特征进行标准化处理
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#@@ 据说skflow已经被集成到TensorFlow的learn中了,所以有下面的写法
#@@ 但是下面的这个仍然存在问题
import tensorflow.contrib.learn.python.learn as learn
import tensorflow as tf
# 使用skflow的LinearRegressor
tf_lr = learn.LinearRegressor(feature_columns=learn.infer_real_valued_columns_from_input(X_train), \
                optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.01))
#@@ optimizer是设置优化器的,默认的梯度0.2会发生梯度爆炸

tf_lr.fit(X_train, y_train, steps=1000, batch_size=50)
tf_lr_y_predict = tf_lr.predict(X_test)
tf_lr_y_predict = np.array(list(tf_lr_y_predict))

print('absoluate error:', metrics.mean_absolute_error(tf_lr_y_predict, y_test),
      '\n')
print('mean squared error:',
      metrics.mean_squared_error(tf_lr_y_predict, y_test), '\n')
print('R-squared value:', metrics.r2_score(tf_lr_y_predict, y_test))