コード例 #1
0
def main(unused_argv):
  iris = datasets.load_iris()
  x_train, x_test, y_train, y_test = train_test_split(
      iris.data, iris.target, test_size=0.2, random_state=42)

  x_train, x_val, y_train, y_val = train_test_split(
      x_train, y_train, test_size=0.2, random_state=42)
  val_monitor = learn.monitors.ValidationMonitor(
      x_val, y_val, early_stopping_rounds=200)

  # classifier with early stopping on training data
  classifier1 = learn.TensorFlowDNNClassifier(
      hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model/')
  classifier1.fit(x=x_train, y=y_train, steps=2000)
  score1 = metrics.accuracy_score(y_test, classifier1.predict(x_test))

  # classifier with early stopping on validation data, save frequently for
  # monitor to pick up new checkpoints.
  classifier2 = learn.TensorFlowDNNClassifier(
      hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model_val/',
      config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
  classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor])
  score2 = metrics.accuracy_score(y_test, classifier2.predict(x_test))

  # In many applications, the score is improved by using early stopping
  print(score2 > score1)
コード例 #2
0
    def train(self, train, *args, **kwargs):
        import numpy as np
        from tensorflow.contrib import learn as skflow

        if 'hidden_units' not in kwargs:
            kwargs['hidden_units'] = [10, 20, 10]
        self.logger.info('Hidden Units = {}'.format(kwargs['hidden_units']))
        if 'n_classes' not in kwargs:
            kwargs['n_classes'] = 5
        self.logger.info('n_classes = {}'.format(kwargs['n_classes']))
        if 'steps' not in kwargs:
            kwargs['steps'] = 5000
        self.logger.info('Number of steps = {}'.format(kwargs['steps']))

        train_labels = train.target()
        self.logger.info('Loading dictionary from {}'.format(self.glove_data))

        self.logger.info('Creating vectors for each question')
        x_train = np.asarray([self.create_vector(question) for question \
                    in train])
        y_train = self.encoder.transform(train_labels)
        self.logger.info('Encoded classes = {}'.format(self.encoder.classes_))

        classifier = skflow.TensorFlowDNNClassifier(**kwargs)

        self.logger.info('Fitting model')
        classifier.fit(x_train, y_train)
        return classifier
コード例 #3
0
def run_training():

    #training_set = datasets.load_csv(filename=file_train,
    #target_dtype=np.int)
    #test_set = datasets.load_csv(filename=file_test,
    #target_dtype=np.int)

    filename_queue = tf.train.string_input_producer([file_train],
                                                    num_epochs=1,
                                                    shuffle=False)
    reader = tf.TextLineReader(skip_header_lines=0)
    _, csv_row = reader.read(filename_queue)
    filename_queue_test = tf.train.string_input_producer([file_test],
                                                         num_epochs=1,
                                                         shuffle=False)
    reader = tf.TextLineReader(skip_header_lines=0)
    _, csv_row_test = reader.read(filename_queue_test)
    record_defaults = [[1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.],
                       [1.], [1.], [1.], [1.], [1]]
    col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14 = tf.decode_csv(
        csv_row, record_defaults=record_defaults)
    features = tf.pack([
        col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11,
        col12, col13
    ])
    col1t, col2t, col3t, col4t, col5t, col6t, col7t, col8t, col9t, col10t, col11t, col12t, col13t, col14t = tf.decode_csv(
        csv_row_test, record_defaults=record_defaults)
    features_test = tf.pack([
        col1t, col2t, col3t, col4t, col5t, col6t, col7t, col8t, col9t, col10t,
        col11t, col12t, col13t
    ])
    min_after_dequeue = 100000
    capacity = min_after_dequeue + FEATURES * batch_size

    images_batch, label_batch = tf.train.batch([features, col14 - 1],
                                               batch_size=batch_size,
                                               capacity=capacity,
                                               num_threads=1)

    classifier = skflow.TensorFlowDNNClassifier(
        hidden_units=[10, 20, 10],
        n_classes=4,
        optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1))
    coln = tf.to_int64(label_batch)
    init = tf.initialize_all_variables()
    with tf.Session() as sess:
        sess.run(init)
        print('hola')
        for step in xrange(1):
            example, label = sess.run([images_batch, label_batch])
            classifier.fit(example, label)
            print('hola')

    coord.request_stop()
    coord.join(threads)
コード例 #4
0
ファイル: classifiers.py プロジェクト: fawind/run-dmc
    def __init__(self, X: np.array, Y: np.array, tune_parameters=False):
        super().__init__(X, Y, tune_parameters=False)
        self.X = X.todense(
        )  # TensorFlow/Skflow doesn't support sparse matrices
        output_layer = len(np.unique(Y))
        if tune_parameters:
            self.param_dist_random = {
                'learning_rate': random.random(100),
                'optimizer': ['Adam'],
                'hidden_units': [sp_randint(50, 500),
                                 sp_randint(50, 500)]
            }

        self.clf = skflow.TensorFlowDNNClassifier(
            hidden_units=self.hidden_units,
            n_classes=output_layer,
            steps=self.steps,
            learning_rate=self.learning_rate,
            verbose=0,
            optimizer=self.optimizer)
コード例 #5
0
def question2():
    print()
    print("***Question 2***")

    path = "./data/"

    # Read dataset
    filename_read = os.path.join(path, "submit-hanmingli-prog2q1.csv")
    df = pd.read_csv(filename_read, na_values=['NA', '?'])

    weight = encode_text_index(df, "weight")
    # Create x(predictors) and y (expected outcome)
    x, y = to_xy(df, 'weight')

    num_classes = len(weight)

    # Split into train/test
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=45)

    # Create a deep neural network with 3 hidden layers of 10, 20, 10
    regressor = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                               n_classes=num_classes,
                                               steps=10000)

    # Early stopping
    early_stop = skflow.monitors.ValidationMonitor(x_test,
                                                   y_test,
                                                   early_stopping_rounds=10000,
                                                   print_steps=100,
                                                   n_classes=num_classes)

    # Fit/train neural network
    regressor.fit(x_train, y_train, monitor=early_stop)

    # Measure accuracy
    pred = regressor.predict(x_test)
    score = np.sqrt(metrics.mean_squared_error(pred, y_test))
    print("Final score (RMSE): {}".format(score))
コード例 #6
0
def main():
    # read in the input data
    input = pd.read_csv('letter-recognition.csv', header=None)
    data = np.array(input.values[:, 1:])

    # get the input labels
    labels1 = np.array(input.values[:, 0])
    labels = []
    # transform letters to numbers ('A'->0, 'B'->1, and so on)
    for label in labels1:
        labels.append(ord(label) - ord('A'))
    labels = np.array(labels)
    n_classes = 26

    # define the classifier with 3 layers (100 units on each layer) and 2000 steps
    classifier = skflow.TensorFlowDNNClassifier(hidden_units=[100, 100, 100], n_classes=n_classes,
                                                learning_rate=0.05, steps=20000)

    scores = []
    # define the 10-fold cross validation
    skf = StratifiedKFold(labels, n_folds=10)
    for train_index, test_index in skf:
        # get the data and labels for both training set and test set
        train_data = data[train_index]
        train_labels = labels[train_index]
        test_data = data[test_index]
        test_labels = labels[test_index]

        # fit data, compute the score
        classifier.fit(train_data, train_labels)
        score = metrics.accuracy_score(test_labels, classifier.predict(test_data))
        scores.append(score)

    # print out average score
    print(scores)
    print("Accuracy: %.5f%%" % (sum(scores) * 10))
コード例 #7
0
def train():
    print("Training dnn_titanic")
    # load and clean the dataset
    df = pandas.read_csv(data_path)
    X, y = df[['Sex', 'Age', 'SibSp', 'Fare']], df['Survived']
    # chain: fillna for str with 'NA', num with 0
    X = preprocess.MultiFillna(X)
    # Label Encoder; will always encode str columns into integers
    mle = preprocess.MultiLabelEncoder(columns=[])
    X = mle.fit_transform(X)

    # random-split into train (80%), test data (20%)
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, y, test_size=0.2, random_state=42)

    # Build 3 layer DNN with 10, 20, 10 units respecitvely. Allows to be trained continuously
    classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                n_classes=2,
                                                steps=500,
                                                learning_rate=0.01,
                                                continue_training=True)

    # Fit and save model for deployment.
    classifier.fit(X_train, y_train)
    score = metrics.accuracy_score(y_test, classifier.predict(X_test))
    print('Accuracy: {0:f}'.format(score))  # should be arond 0.74

    # Clean checkpoint folder if exists
    try:
        shutil.rmtree(model_path)
    except OSError:
        pass
    # save the model and label encoder for use
    classifier.save(model_path)
    mle.save(model_path)
    print('Model saved to', model_path)
コード例 #8
0
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from sklearn import metrics, cross_validation

from tensorflow.contrib import learn

# Load dataset.
iris = learn.datasets.load_dataset('iris')
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42)

# Build 3 layer DNN with 10, 20, 10 units respectively.
classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                           n_classes=3,
                                           steps=200)

# Fit and predict.
classifier.fit(X_train, y_train)
score = metrics.accuracy_score(y_test, classifier.predict(X_test))
print('Accuracy: {0:f}'.format(score))
コード例 #9
0
#  http://terrytangyuan.github.io/2016/03/14/scikit-flow-intro/

import tensorflow.contrib.learn as skflow
from sklearn import datasets, metrics

iris = datasets.load_iris()
classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
classifier.fit(iris.data, iris.target)
score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
print("Accuracy: %f" % score)
コード例 #10
0
    sess.run(v2.initializer)
    #위 두 라인과 동일한 효과를 냅니다.
    sess.run(tf.global_variables_initializer())
    #변수를 실행한다는 것은 변수안의 텐서 연산을 실행하는 것입니다.
    print(sess.run([v1, v2]))
    print(sess.run([v1._variable, v2._variable]))




import tensorflow as tf
from tensorflow.contrib import learn as skflow

classifier = skflow.TensorFlowDNNClassifier (
        hidden_unitts=[10, 20, 10],
        n_classes= 2,
        batch_size= 128,
        steps= 500,
        learning_rate= 0.05)



import tensorflow as tf

x = tf.constant([[1.0, 2.0, 3.0]])
w = tf.constant([[2.0], [2.0], [2.,]])
y = tf.matmul(x, w)
print(x.get_shape())

sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
コード例 #11
0
iris = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                    iris.target,
                                                    test_size=0.2,
                                                    random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                  y_train,
                                                  test_size=0.2,
                                                  random_state=42)
val_monitor = learn.monitors.ValidationMonitor(X_val,
                                               y_val,
                                               early_stopping_rounds=200)

# classifier with early stopping on training data
classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                            n_classes=3,
                                            model_dir='/tmp/iris_model/')
classifier1.fit(X_train, y_train, steps=2000)
score1 = metrics.accuracy_score(y_test, classifier1.predict(X_test))

# classifier with early stopping on validation data
classifier2 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                            n_classes=3,
                                            model_dir='/tmp/iris_model_val/')
classifier2.fit(X_train, y_train, val_monitor, steps=2000)
score2 = metrics.accuracy_score(y_test, classifier2.predict(X_test))

# In many applications, the score is improved by using early stopping
print(score2 > score1)
コード例 #12
0
def question5():
    print()
    print("***Question 5***")
    filename_read = os.path.join(path, "auto-mpg.csv")
    filename_write = os.path.join(path, "submit-hanmingli-prog2q5.csv")
    df = pd.read_csv(filename_read, na_values=['NA', '?'])

    # create feature vector
    missing_median(df, 'horsepower')
    encode_numeric_zscore(df, 'mpg')
    encode_numeric_zscore(df, 'horsepower')
    encode_numeric_zscore(df, 'weight')
    encode_numeric_zscore(df, 'displacement')
    encode_numeric_zscore(df, 'acceleration')
    encode_numeric_zscore(df, 'origin')

    tem = df['name']
    df.drop('name', 1, inplace=True)

    # Shuffle
    np.random.seed(42)
    df = df.reindex(np.random.permutation(df.index))
    df.reset_index(inplace=True, drop=True)

    # Encode to a 2D matrix for training
    x, y = to_xy(df, 'cylinders')

    # Cross validate
    kf = KFold(len(x), n_folds=5)

    oos_y = []
    oos_pred = []
    fold = 1
    for train, test in kf:
        print("Fold #{}".format(fold))
        fold += 1

        x_train = x[train]
        y_train = y[train]
        x_test = x[test]
        y_test = y[test]

        # Create a deep neural network with 3 hidden layers of 10, 20, 10
        classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                                    n_classes=9,
                                                    steps=500)

        # Early stopping
        early_stop = skflow.monitors.ValidationMonitor(
            x_test,
            y_test,
            early_stopping_rounds=200,
            print_steps=50,
            n_classes=9)

        # Fit/train neural network
        classifier.fit(x_train, y_train, monitor=early_stop)

        # Add the predictions to the oos prediction list
        pred = classifier.predict(x_test)

        oos_y.append(y_test)
        oos_pred.append(pred)

        # Measure accuracy
        score = np.sqrt(metrics.mean_squared_error(pred, y_test))
        print("Fold score: {}".format(score))

    # Build the oos prediction list and calculate the error.
    oos_y = np.concatenate(oos_y)
    oos_pred = np.concatenate(oos_pred)
    score = np.sqrt(metrics.mean_squared_error(oos_pred, oos_y))
    print("Final, out of sample score: {}".format(score))

    # Write the cross-validated prediction
    oos_y = pd.DataFrame(oos_y)
    oos_pred = pd.DataFrame(oos_pred)
    oos_y.columns = ['ideal']
    oos_pred.columns = ['predict']
    oosDF = pd.concat([df, tem, oos_y, oos_pred], axis=1)
    oosDF.to_csv(filename_write, index=False)
fold_recall = []
fold_MSE = []
fold_RMSE = []
fold_MAE = []
fold_confusion_matrix = np.array([[0, 0], [0, 0]])
fold_number = 1
for train_index, test_index in skf:
    print("Fold Number:", fold_number)
    fold_number += 1
    x_train1, x_test1 = df_data_NM[train_index], df_data_NM[test_index]
    y_train1, y_test1 = df_label_NM_1D[train_index], df_label_NM_1D[test_index]

    print(len(y_test1))
    # Build 3 layer DNN with 30 units.
    classifier = skflow.TensorFlowDNNClassifier(hidden_units=[30],
                                                n_classes=2,
                                                steps=50000)

    # Fit
    classifier.fit(x_train1, y_train1)

    score_accracy = metrics.accuracy_score(y_test1,
                                           classifier.predict(x_test1))
    fold_accuracy.append(score_accracy)

    score_precision = metrics.precision_score(y_test1,
                                              classifier.predict(x_test1))
    fold_precision.append(score_precision)

    score_recall = metrics.recall_score(y_test1, classifier.predict(x_test1))
    fold_recall.append(score_recall)
コード例 #14
0
 def get_classifier(self, X, y):
     return skflow.TensorFlowDNNClassifier(hidden_units=[5, 3], n_classes=2)
コード例 #15
0
iris = learn.datasets.load_dataset('iris')
x_train, x_test, y_train, y_test = cross_validation.train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42)

# Note that we are saving and load iris data as h5 format as a simple
# demonstration here.
h5f = h5py.File('test_hdf5.h5', 'w')
h5f.create_dataset('X_train', data=x_train)
h5f.create_dataset('X_test', data=x_test)
h5f.create_dataset('y_train', data=y_train)
h5f.create_dataset('y_test', data=y_test)
h5f.close()

h5f = h5py.File('test_hdf5.h5', 'r')
x_train = h5f['X_train']
x_test = h5f['X_test']
y_train = h5f['y_train']
y_test = h5f['y_test']

# Build 3 layer DNN with 10, 20, 10 units respectively.
feature_columns = learn.infer_real_valued_columns_from_input(x_train)
classifier = learn.TensorFlowDNNClassifier(feature_columns=feature_columns,
                                           hidden_units=[10, 20, 10],
                                           n_classes=3,
                                           steps=200)

# Fit and predict.
classifier.fit(x_train, y_train)
score = metrics.accuracy_score(y_test, classifier.predict(x_test))
print('Accuracy: {0:f}'.format(score))
コード例 #16
0
ファイル: box_search.py プロジェクト: tobby2002/portfolio-1
                                                  labels,
                                                  test_size=0.2,
                                                  random_state=42)

# Remove this exit to move on
# exit()
'''
Part 4 : Simple DNN
------------------------------------------------------------------------------------------------------------------
'''
# Now we'll create a simple deep neural network tensorflow graph
# For regression you can use learn.TensorFlowDNNRegressor
classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                           n_classes=y_classes,
                                           batch_size=1,
                                           steps=100,
                                           optimizer="Adam",
                                           learning_rate=0.01,
                                           dropout=0.6)
#,feature_columns=list(range(88)))

# learn.TensorFlowDNNClassifier

# Here we'll train our DNN
classifier.fit(X_train, y_train, logdir='/tmp/tf_learn/')

# and evaluate it on our dev data
predictions = classifier.predict(X_dev)
score = metrics.accuracy_score(y_dev, predictions)
print("Accuracy: %f" % score)