Beispiel #1
0
def main(unused_args):
    ### Download and load MNIST dataset.
    mnist = learn.datasets.load_dataset('mnist')

    ### Linear classifier.
    feature_columns = learn.infer_real_valued_columns_from_input(
        mnist.train.images)
    classifier = learn.LinearClassifier(feature_columns=feature_columns,
                                        n_classes=10)
    classifier.fit(mnist.train.images,
                   mnist.train.labels.astype(np.int32),
                   batch_size=100,
                   steps=1000)
    score = metrics.accuracy_score(mnist.test.labels,
                                   list(classifier.predict(mnist.test.images)))
    print('Accuracy: {0:f}'.format(score))

    ### Convolutional network
    classifier = learn.Estimator(model_fn=conv_model)
    classifier.fit(mnist.train.images,
                   mnist.train.labels,
                   batch_size=100,
                   steps=20000)
    score = metrics.accuracy_score(mnist.test.labels,
                                   list(classifier.predict(mnist.test.images)))
    print('Accuracy: {0:f}'.format(score))
def build_lr_estimator(model_dir, feature_count):
    return estimator.SKCompat(
        learn.LinearClassifier(feature_columns=[
            tf.contrib.layers.real_valued_column("", dimension=feature_count)
        ],
                               n_classes=2,
                               model_dir=model_dir))
Beispiel #3
0
def linear_model(output_dir):
    real, sparse = get_features()
    all = {}
    all.update(real)
    all.update(sparse)
    estimator = tflearn.LinearClassifier(model_dir=output_dir, feature_columns=all.values())
    estimator.params["head"]._thresholds = [0.7]  # FIXME: hack
    return estimator
Beispiel #4
0
def train_and_evaluate(working_dir,
                       num_train_instances=NUM_TRAIN_INSTANCES,
                       num_test_instances=NUM_TEST_INSTANCES):
    """Train the model on training data and evaluate on test data.

  Args:
    working_dir: Directory to read transformed data and metadata from and to
        write exported model to.
    num_train_instances: Number of instances in train set
    num_test_instances: Number of instances in test set

  Returns:
    The results from the estimator's 'evaluate' method
  """
    tf_transform_output = tft.TFTransformOutput(working_dir)

    # Wrap scalars as real valued columns.
    real_valued_columns = [
        tf.feature_column.numeric_column(key, shape=())
        for key in NUMERIC_FEATURE_KEYS
    ]

    # Wrap categorical columns.
    one_hot_columns = [
        tf.feature_column.categorical_column_with_vocabulary_file(
            key=key,
            vocabulary_file=tf_transform_output.vocabulary_file_by_name(
                vocab_filename=key)) for key in CATEGORICAL_FEATURE_KEYS
    ]

    estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns)

    # Fit the model using the default optimizer.
    train_input_fn = _make_training_input_fn(
        tf_transform_output,
        os.path.join(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE + '*'),
        batch_size=TRAIN_BATCH_SIZE)
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * num_train_instances /
                  TRAIN_BATCH_SIZE)

    # Evaluate model on test dataset.
    eval_input_fn = _make_training_input_fn(
        tf_transform_output,
        os.path.join(working_dir, TRANSFORMED_TEST_DATA_FILEBASE + '*'),
        batch_size=1)

    # Export the model.
    serving_input_fn = _make_serving_input_fn(tf_transform_output)
    exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR)
    estimator.export_savedmodel(exported_model_dir, serving_input_fn)

    return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
Beispiel #5
0
def train_and_evaluate(transformed_train_filepattern,
                       transformed_test_filepattern, transformed_metadata_dir,
                       num_train_instances=NUM_TRAIN_INSTANCES,
                       num_test_instances=NUM_TEST_INSTANCES):
  """Train the model on training data and evaluate on test data.

  Args:
    transformed_train_filepattern: File pattern for transformed training data
        shards
    transformed_test_filepattern: File pattern for transformed test data shards
    transformed_metadata_dir: Directory containing transformed data metadata
    num_train_instances: Number of instances in train set
    num_test_instances: Number of instances in test set

  Returns:
    The results from the estimator's 'evaluate' method
  """

  # Wrap scalars as real valued columns.
  real_valued_columns = [feature_column.real_valued_column(key)
                         for key in NUMERIC_COLUMNS]

  # Wrap categorical columns.  Note the combiner is irrelevant since the input
  # only has one value set per feature per instance.
  one_hot_columns = [
      feature_column.sparse_column_with_integerized_feature(
          key, bucket_size=bucket_size, combiner='sum')
      for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES)]

  estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns)

  transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir)
  train_input_fn = input_fn_maker.build_training_input_fn(
      transformed_metadata,
      transformed_train_filepattern,
      training_batch_size=TRAIN_BATCH_SIZE,
      label_keys=[LABEL_COLUMN])

  # Estimate the model using the default optimizer.
  estimator.fit(
      input_fn=train_input_fn,
      max_steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE)

  # Evaluate model on test dataset.
  eval_input_fn = input_fn_maker.build_training_input_fn(
      transformed_metadata,
      transformed_test_filepattern,
      training_batch_size=1,
      label_keys=[LABEL_COLUMN])

  return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
Beispiel #6
0
def train_and_evaluate(transformed_train_filepattern,
                       transformed_test_filepattern,
                       transformed_metadata_dir,
                       num_train_instances=NUM_TRAIN_INSTANCES,
                       num_test_instances=NUM_TEST_INSTANCES):
    """Train the model on training data and evaluate on evaluation data.

  Args:
    transformed_train_filepattern: Base filename for transformed training data
        shards
    transformed_test_filepattern: Base filename for transformed evaluation data
        shards
    transformed_metadata_dir: Directory containing transformed data metadata
    num_train_instances: Number of instances in train set
    num_test_instances: Number of instances in test set

  Returns:
    The results from the estimator's 'evaluate' method
  """
    # Unrecognized tokens are represented by -1, but
    # sparse_column_with_integerized_feature uses the mod operator to map integers
    # to the range [0, bucket_size).  By choosing bucket_size=VOCAB_SIZE + 1, we
    # represent unrecognized tokens as VOCAB_SIZE.
    review_column = feature_column.sparse_column_with_integerized_feature(
        REVIEW_COLUMN, bucket_size=VOCAB_SIZE + 1, combiner='sum')
    weighted_reviews = feature_column.weighted_sparse_column(
        review_column, REVIEW_WEIGHT)

    estimator = learn.LinearClassifier([weighted_reviews])

    transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir)
    train_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_train_filepattern,
        training_batch_size=TRAIN_BATCH_SIZE,
        label_keys=[LABEL_COLUMN])

    # Estimate the model using the default optimizer.
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * num_train_instances /
                  TRAIN_BATCH_SIZE)

    # Evaluate model on eval dataset.
    eval_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_test_filepattern,
        training_batch_size=1,
        label_keys=[LABEL_COLUMN])

    return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
Beispiel #7
0
def train_and_evaluate(working_dir,
                       num_train_instances=NUM_TRAIN_INSTANCES,
                       num_test_instances=NUM_TEST_INSTANCES):
    """Train the model on training data and evaluate on test data.

  Args:
    working_dir: Directory to read transformed data and metadata from and to
        write exported model to.
    num_train_instances: Number of instances in train set
    num_test_instances: Number of instances in test set

  Returns:
    The results from the estimator's 'evaluate' method
  """

    # Wrap scalars as real valued columns.
    real_valued_columns = [
        tf.feature_column.numeric_column(key, shape=())
        for key in NUMERIC_FEATURE_KEYS
    ]

    # Wrap categorical columns.  Note the combiner is irrelevant since the input
    # only has one value set per feature per instance.
    one_hot_columns = [
        tf.feature_column.categorical_column_with_identity(
            key, num_buckets=num_buckets)
        for key, num_buckets in zip(CATEGORICAL_FEATURE_KEYS, BUCKET_SIZES)
    ]

    estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns)

    # Fit the model using the default optimizer.
    train_input_fn = _make_training_input_fn(working_dir,
                                             TRANSFORMED_TRAIN_DATA_FILEBASE,
                                             batch_size=TRAIN_BATCH_SIZE)
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * num_train_instances /
                  TRAIN_BATCH_SIZE)

    # Evaluate model on test dataset.
    eval_input_fn = _make_training_input_fn(working_dir,
                                            TRANSFORMED_TEST_DATA_FILEBASE,
                                            batch_size=1)

    # Export the model.
    serving_input_fn = _make_serving_input_fn(working_dir)
    exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR)
    estimator.export_savedmodel(exported_model_dir, serving_input_fn)

    return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
Beispiel #8
0
def Linearsklearn():
    NUM_STEPS = 200
    MINIBATCH_SIZE = 506

    feature_columns = learn.infer_real_valued_columns_from_input(x_data)

    reg = learn.LinearClassifier(
        feature_columns = feature_columns,
        optimizer= tf.train.GradientDescentOptimizer(learning_rate=0.1)
    )
    reg.fit(x_data, boston.target, steps= NUM_STEPS,batch_size=MINIBATCH_SIZE)

    MSE = reg.evaluate(x_data,boston.target, steps=1)
    print(MSE)
def build_estimator(model_dir, model_type):
    """build an estimator"""

    # base sparse feature process
    gender = layers.sparse_column_with_keys(column_name='gender', keys=['female', 'male'])
    education = layers.sparse_column_with_hash_bucket(column_name='education', hash_bucket_size=1000)
    relationship = layers.sparse_column_with_hash_bucket(column_name='relationship', hash_bucket_size=100)
    workclass = layers.sparse_column_with_hash_bucket(column_name='workclass', hash_bucket_size=100)
    occupation = layers.sparse_column_with_hash_bucket(column_name='occupation', hash_bucket_size=1000)
    native_country = layers.sparse_column_with_hash_bucket(column_name='native_country', hash_bucket_size=1000)

    # base continuous feature
    age = layers.real_valued_column(column_name='age')
    education_num = layers.real_valued_column(column_name='education_num')
    capital_gain = layers.real_valued_column(column_name='capital_gain')
    capital_loss = layers.real_valued_column(column_name='capital_loss')
    hours_per_week = layers.real_valued_column(column_name='hours_per_week')

    # transformation.bucketization 将连续变量转化为类别标签。从而提高我们的准确性
    age_bucket = layers.bucketized_column(source_column=age,
                                          boundaries=[18, 25, 30, 35, 40, 45,50, 55, 60, 65])

    # wide columns and deep columns
    # 深度模型使用到的特征和广度模型使用到的特征
    # 广度模型特征只只用到了分类标签
    wide_columns = [gender, native_country, education, relationship, workclass, occupation, age_bucket,
                    layers.crossed_column(columns=[education, occupation], hash_bucket_size=int(1e4)),
                    layers.crossed_column(columns=[age_bucket, education, occupation], hash_bucket_size=int(1e6)),
                    layers.crossed_column(columns=[native_country, occupation], hash_bucket_size=int(1e4))]

    deep_columns = [layers.embedding_column(workclass, dimension=8),
                    layers.embedding_column(education, dimension=8),
                    layers.embedding_column(gender, dimension=8),
                    layers.embedding_column(relationship, dimension=8),
                    layers.embedding_column(native_country, dimension=8),
                    layers.embedding_column(occupation, dimension=8),
                    age, education_num, capital_gain, capital_loss, hours_per_week]

    if model_type == "wide":
        m=learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir)
    elif model_type == "deep":
        m=learn.DNNClassifier(feature_columns=deep_columns, model_dir=model_dir, hidden_units=[100, 50])
    else:
        m=learn.DNNLinearCombinedClassifier(model_dir=model_dir,
                                            linear_feature_columns=wide_columns,
                                            dnn_feature_columns=deep_columns,
                                            dnn_hidden_units=[256, 128, 64],
                                            dnn_activation_fn=tf.nn.relu)
    return m
Beispiel #10
0
def train_and_evaluate(transformed_train_data_base, transformed_eval_data_base,
                       transformed_metadata_dir):
    """Train the model on training data and evaluate on evaluation data.

  Args:
    transformed_train_data_base: Base filename for transformed training data
        shards
    transformed_eval_data_base: Base filename for cleaned evaluation data
        shards
    transformed_metadata_dir: Directory containing transformed data metadata.

  Returns:
    The results from the estimator's 'evaluate' method.
  """

    # Wrap scalars as real valued columns.
    real_valued_columns = [
        feature_column.real_valued_column(key) for key in NUMERIC_COLUMNS
    ]

    # Wrap categorical columns.
    one_hot_columns = [
        feature_column.sparse_column_with_integerized_feature(
            key, bucket_size=bucket_size)
        for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES)
    ]

    estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns)

    transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir)
    train_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_train_data_base + '*',
        training_batch_size=TRAIN_BATCH_SIZE,
        label_keys=['label'])

    # Estimate the model using the default optimizer.
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * NUM_TRAIN_INSTANCES /
                  TRAIN_BATCH_SIZE)

    # Evaluate model on eval dataset.
    eval_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_eval_data_base + '*',
        training_batch_size=1,
        label_keys=['label'])

    return estimator.evaluate(input_fn=eval_input_fn, steps=NUM_EVAL_INSTANCES)
Beispiel #11
0
def train_and_evaluate(working_dir,
                       num_train_instances=NUM_TRAIN_INSTANCES,
                       num_test_instances=NUM_TEST_INSTANCES):
    """Train the model on training data and evaluate on evaluation data.

  Args:
    working_dir: Directory to read transformed data and metadata from.
    num_train_instances: Number of instances in train set
    num_test_instances: Number of instances in test set

  Returns:
    The results from the estimator's 'evaluate' method
  """
    tf_transform_output = tft.TFTransformOutput(working_dir)

    # Unrecognized tokens are represented by -1, but
    # categorical_column_with_identity uses the mod operator to map integers
    # to the range [0, bucket_size).  By choosing bucket_size=VOCAB_SIZE + 1, we
    # represent unrecognized tokens as VOCAB_SIZE.
    review_column = tf.feature_column.categorical_column_with_identity(
        REVIEW_KEY, num_buckets=VOCAB_SIZE + 1)
    weighted_reviews = tf.feature_column.weighted_categorical_column(
        review_column, REVIEW_WEIGHT_KEY)

    estimator = learn.LinearClassifier([weighted_reviews])

    # Fit the model using the default optimizer.
    train_input_fn = _make_training_input_fn(
        tf_transform_output,
        os.path.join(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE + '*'),
        batch_size=TRAIN_BATCH_SIZE)
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * num_train_instances /
                  TRAIN_BATCH_SIZE)

    # Evaluate model on eval dataset.
    eval_input_fn = _make_training_input_fn(
        tf_transform_output,
        os.path.join(working_dir, TRANSFORMED_TEST_DATA_FILEBASE + '*'),
        batch_size=1)
    result = estimator.evaluate(input_fn=eval_input_fn,
                                steps=num_test_instances)

    # Export the model.
    serving_input_fn = _make_serving_input_fn(tf_transform_output)
    exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR)
    estimator.export_savedmodel(exported_model_dir, serving_input_fn)

    return result
Beispiel #12
0
    def train_and_evaluate(output_dir):
        review_column = feature_column.sparse_column_with_integerized_feature(
            const.REVIEW_COLUMN, bucket_size=vocab_size + 1, combiner='sum')
        weighted_reviews = feature_column.weighted_sparse_column(
            review_column, const.REVIEW_WEIGHT)

        estimator = learn.LinearClassifier(
            feature_columns=[weighted_reviews],
            n_classes=2,
            model_dir=output_dir,
            config=tf.contrib.learn.RunConfig(save_checkpoints_secs=30))

        transformed_metadata = metadata_io.read_metadata(
            transformed_metadata_dir)
        raw_metadata = metadata_io.read_metadata(raw_metadata_dir)

        train_input_fn = input_fn_maker.build_training_input_fn(
            transformed_metadata,
            transformed_train_file_pattern,
            training_batch_size=train_batch_size,
            label_keys=[const.LABEL_COLUMN])

        eval_input_fn = input_fn_maker.build_training_input_fn(
            transformed_metadata,
            transformed_test_file_pattern,
            training_batch_size=1,
            label_keys=[const.LABEL_COLUMN])

        serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn(
            raw_metadata=raw_metadata,
            transform_savedmodel_dir=output_dir + '/transform_fn',
            raw_label_keys=[],
            raw_feature_keys=[const.REVIEW_COLUMN])

        export_strategy = saved_model_export_utils.make_export_strategy(
            serving_input_fn,
            exports_to_keep=5,
            default_output_alternative_key=None)

        return tf.contrib.learn.Experiment(estimator=estimator,
                                           train_steps=train_num_epochs *
                                           num_train_instances /
                                           train_batch_size,
                                           eval_steps=num_test_instances,
                                           train_input_fn=train_input_fn,
                                           eval_input_fn=eval_input_fn,
                                           export_strategies=export_strategy,
                                           min_eval_frequency=500)
Beispiel #13
0
def run7():
    mnist = learn.datasets.load_dataset('mnist')
    data = mnist.train.images
    labels = np.asarray(mnist.train.labels, dtype=np.int32)
    test_data = mnist.test.images
    test_labels = np.asarray(mnist.test.labels, dtype=np.int32)

    max_examples = 1000
    data = data[:max_examples]
    labels = labels[:max_examples]

    def display(i):
        img = test_data[i]
        plt.title('Example {}, Label: {}'.format(i, test_labels[i]))
        plt.imshow(img.reshape((28, 28)), cmap=plt.cm.gray)
        plt.show()

    # display(0)

    feature_columns = learn.infer_real_valued_columns_from_input(data)
    clf = learn.LinearClassifier(feature_columns=feature_columns, n_classes=10)
    clf.fit(data, labels, batch_size=100, steps=1000)

    "0.8607"
    print(clf.evaluate(test_data, test_labels)['accuracy'])

    # print('Predicted {}, Label: {}'.format(clf.predict(test_data[0]), test_labels[0]))

    weights = clf.weights_

    f, axes = plt.subplots(2, 5, figsize=(10, 4))
    axes = axes.reshape(-1)
    for i in range(len(axes)):
        a = axes[i]
        a.imshow(weights.T[i].reshape(28, 28), cmap=plt.cm.seismic)
        a.set_title(i)
        a.set_xticks(())
        a.set_yticks(())

    plt.show()
Beispiel #14
0
    def trainClassifier(self):
        features = []
        featureExtraction = featureExtractor
        self.sensorData = np.array(self.sensorData)
        self.sensorData = self.sensorData[0:, 0:, 1:]
        self.soundData = np.array(self.soundData)
        self.labels = np.array(self.labels)

        for x in range(len(self.sensorData)):
            features.append(
                featureExtraction.extract(featureExtraction,
                                          self.sensorData[x],
                                          self.soundData[x, 0:]))
        features = np.array(features)
        training = features[0:]
        test = features[500:550]
        validation = features[550:]
        training_labels = self.labels[0:]
        test_labels = self.labels[500:550]
        validaton_labels = self.labels[550:]
        train_dataset, train_labels = self.randomize(training, training_labels)
        test_dataset, test_labels = self.randomize(test, test_labels)
        validation_dataset, validation_labels = self.randomize(
            validation, validaton_labels)
        feature_columns = learn.infer_real_valued_columns_from_input(
            train_dataset)
        print("loading...")
        classifier = learn.LinearClassifier(
            n_classes=5,
            feature_columns=feature_columns,
            optimizer=tf.train.FtrlOptimizer(learning_rate_power=-0.69,
                                             learning_rate=0.00001,
                                             l1_regularization_strength=0.1))
        classifier.fit(train_dataset, train_labels, steps=30000)
        print("done")
        self.linearClassifier = classifier
Beispiel #15
0
              train[training_idx,:]
test_labels, train_labels = labels[test_idx],\
                            labels[training_idx]

train = np.array(train, dtype=np.float32)
test = np.array(test, dtype=np.float32)
train_labels = np.array(train_labels, dtype=np.int32)
test_labels = np.array(test_labels, dtype=np.int32)

# Convert features to learn style
feature_columns = learn.infer_real_valued_columns_from_input(
    train.reshape([-1, 36 * 36]))

# Logistic Regression
classifier = estimator.SKCompat(
    learn.LinearClassifier(feature_columns=feature_columns, n_classes=5))

# One line training
# steps is number of total batches
# steps*batch_size/len(train) = num_epochs
classifier.fit(train.reshape([-1, 36 * 36]),
               train_labels,
               steps=1024,
               batch_size=32)

# sklearn compatible accuracy
test_probs = classifier.predict(test.reshape([-1, 36 * 36]))
sklearn.metrics.accuracy_score(test_labels, test_probs['classes'])

# Dense neural net
classifier = estimator.SKCompat(
#plt.imshow(digits['images'][66], cmap="gray", interpolation='none')

#plt.show()

from sklearn import svm

classifier = svm.SVC(gamma=0.001)
classifier.fit(digits.data, digits.target)
predicted = classifier.predict(digits.data)

print(np.mean(digits.target == predicted))

X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target)

import tensorflow as tf
from tensorflow.contrib import learn

n_classes = len(set(y_train))

classifier = learn.LinearClassifier(feature_columns=[
    tf.contrib.layers.real_valued_column("", dimension=X_train.shape[1])
],
                                    n_classes=n_classes)
classifier.fit(X_train, y_train, steps=10)

y_pred = classifier.predict(X_test)

from sklearn import metrics

print(metrics.classification_report(y_true=y_test, y_pred=y_pred))
Beispiel #17
0
    one_image = img.reshape(image_width, image_height)
    plt.axis('off')
    plt.imshow(one_image, cmap=cm.binary)
    plt.show()
    #pylab.show()


# output image
display(X_train[1:2].values)

mnist = learn.datasets.load_dataset('mnist')
feature_columns = learn.infer_real_valued_columns_from_input(
    mnist.train.images)

classifier = learn.LinearClassifier(
    feature_columns=feature_columns,
    n_classes=10,
    optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.01))
classifier.fit(X_train, y_train, steps=1000, batch_size=100)
linear_y_predict = classifier.predict(X_test)
accuracy_score = classifier.evaluate(X_train, y_train)["accuracy"]

print accuracy_score
print linear_y_predict[:100]
linear_submission = pd.DataFrame({
    'ImageId': range(1, 28001),
    'Label': linear_y_predict
})
linear_submission.to_csv('linear_submission.csv', index=False)
print 'linear done'

classifier = learn.DNNClassifier(
def build_estimator(model_dir, classifier):

  # Categorical columns
  sex = tf.contrib.layers.sparse_column_with_keys(column_name="Sex",
                                                     keys=["female", "male"])
  family = tf.contrib.layers.sparse_column_with_keys(column_name="Family",
                                                       keys=["Large", "Nuclear", "Solo"])
  child = tf.contrib.layers.sparse_column_with_keys(column_name="Child",
                                                       keys=["0", "1"])
  ageknown = tf.contrib.layers.sparse_column_with_keys(column_name="AgeKnown",
                                                       keys=["0", "1"])
  embarked = tf.contrib.layers.sparse_column_with_keys(column_name="Embarked",
                                                       keys=["C", "S", "Q"])
  young = tf.contrib.layers.sparse_column_with_keys(column_name="Young",
                                                       keys=["0", "1"])
  malebadticket = tf.contrib.layers.sparse_column_with_keys(column_name="MaleBadTicket",
                                                       keys=["0", "1"])
  cab = tf.contrib.layers.sparse_column_with_hash_bucket(
      "Cab", hash_bucket_size=10)
  namet = tf.contrib.layers.sparse_column_with_hash_bucket(
      "NameT", hash_bucket_size=20)

  # Continuous columns
  age = tf.contrib.layers.real_valued_column("Age")
  namelength = tf.contrib.layers.real_valued_column("NameLength")
  fare = tf.contrib.layers.real_valued_column("Fare")
  p_class = tf.contrib.layers.real_valued_column("Pclass")

  # Transformations.
  fare_buckets = tf.contrib.layers.bucketized_column(fare,
  						     boundaries=[
						        5, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550
						     ])
  age_buckets = tf.contrib.layers.bucketized_column(age,
                                                    boundaries=[
                                                        5, 18, 25, 30, 35, 40, 45, 50, 55, 65
                                                    ])
  pclass_buckets = tf.contrib.layers.bucketized_column(p_class,
                                                    boundaries=[1, 2, 3])

   # Wide columns and deep columns.
  wide_columns = [sex, cab, namet, child, ageknown, embarked, young, family,
                  tf.contrib.layers.crossed_column(
                      [age_buckets, sex],
                      hash_bucket_size=int(1e3)),
		  tf.contrib.layers.crossed_column(
		      [pclass_buckets, sex],
                      hash_bucket_size=int(1e3)),
		  tf.contrib.layers.crossed_column(
		      [fare_buckets, pclass_buckets],
                      hash_bucket_size=int(1e3)),
		  tf.contrib.layers.crossed_column(
		      [embarked, pclass_buckets],
                      hash_bucket_size=int(1e3)),
		  tf.contrib.layers.crossed_column(
		      [embarked, sex],
                      hash_bucket_size=int(1e3))]


  deep_columns = [
      namelength,
      fare,
      p_class,
      tf.contrib.layers.embedding_column(sex, dimension=8),
      tf.contrib.layers.embedding_column(child, dimension=8),
      tf.contrib.layers.embedding_column(family, dimension=8),
      tf.contrib.layers.embedding_column(cab, dimension=8),
      tf.contrib.layers.embedding_column(namet, dimension=8),
      tf.contrib.layers.embedding_column(ageknown, dimension=8),
      tf.contrib.layers.embedding_column(embarked, dimension=8),
      tf.contrib.layers.embedding_column(young, dimension=8),
      tf.contrib.layers.embedding_column(malebadticket, dimension=8)
  ]

  if classifier == "deep":
    return Learn.DNNClassifier(model_dir=model_dir,
                               feature_columns=deep_columns,
                               hidden_units=[32, 16],
                               optimizer=tf.train.ProximalAdagradOptimizer(
                               learning_rate=0.1,
                               l2_regularization_strength=0.001))
  elif classifier == "wide":
    return Learn.LinearClassifier(
            feature_columns=wide_columns,
            optimizer=tf.train.FtrlOptimizer(
                    learning_rate=5,
                    l1_regularization_strength=1000.0,
                    l2_regularization_strength=1000.0),
                    model_dir=model_dir)
  else:
    return Learn.DNNLinearCombinedClassifier(
            linear_feature_columns=wide_columns,
            dnn_feature_columns=deep_columns,
            dnn_hidden_units=[32, 16],
            model_dir=model_dir,   
	    linear_optimizer=tf.train.FtrlOptimizer(
	                        learning_rate=10,
				l1_regularization_strength=100.0,
				l2_regularization_strength=100.0),
            dnn_optimizer=tf.train.ProximalAdagradOptimizer(
	                            learning_rate=0.1,
                                    l2_regularization_strength=0.001))
# The above code is equivalent to the below commented python code
# But the above 'learn' version is recommended since it can run on any device - CPU, GPU,
# mobile, whereas the below python code will run only on CPU
#    x = sample[['x1','x2']].as_matrix()
#    y = sample[['label']].as_matrix()

# Divide the input data into train and validation
x_train,x_validate,y_train,y_validate = model_selection.train_test_split(x, y, test_size=0.2, random_state=100)
type(x_train)

#feature engineering
feature_cols = [layers.real_valued_column("", dimension=2)]

#build the model configuration              
classifier = learn.LinearClassifier(feature_columns=feature_cols,
                                            n_classes=2,
                                            model_dir="/home/algo/Algorithmica/tmp")

#build the model
classifier.fit(x=x_train, y=y_train, steps=1000)
classifier.weights_
classifier.bias_

# By default, enable_centered_bias = True in learn.LinearClassifier
centered_bias_weight = classifier.get_variable_value("centered_bias_weight")

#evaluate the model using validation set
results = classifier.evaluate(x=x_validate, y=y_validate, steps=1)
type(results)
for key in sorted(results):
    print "%s:%s" % (key, results[key])
                                                  features_dtype=np.float32,
                                                  target_column=-1)
X = sample.data
y = sample.target

# Divide the input data into train and validation
X_train, X_validation, y_train, y_validation = model_selection.train_test_split(
    X, y, test_size=0.2, random_state=100)
type(X_train)

#feature engineering
feature_cols = [layers.real_valued_column("", dimension=2)]

#build the model configuration
classifier = learn.LinearClassifier(feature_columns=feature_cols,
                                    n_classes=2,
                                    model_dir="/home/algo/m2")

#build the model
classifier.fit(x=X_train, y=y_train, steps=1000)
classifier.weights_
classifier.bias_

#evaluate the model using validation set
results = classifier.evaluate(x=X_validation, y=y_validation, steps=1)
type(results)
for key in sorted(results):
    print "%s:%s" % (key, results[key])

# Predict the outcome of test data using model
X_test = np.array([[100.4, 21.5], [200.1, 26.1]])
#
#script harvested from:
#https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/learn/python/learn
#
# skflow intro clasifiers
#---------------------------------------
#
import tensorflow.contrib.learn as learn
from sklearn import datasets, metrics, preprocessing
import tensorflow as tf
import tensorflow.contrib.layers as layers

#Linear Classifier
iris = datasets.load_iris()
feature_columns = learn.infer_real_valued_columns_from_input(iris.data)
classifier = learn.LinearClassifier(n_classes=3,
                                    feature_columns=feature_columns)
classifier.fit(iris.data, iris.target, steps=200, batch_size=32)
iris_predictions = list(classifier.predict(iris.data, as_iterable=True))
score = metrics.accuracy_score(iris.target, iris_predictions)
print("Accuracy: %f" % score)

#Linear Regression
boston = datasets.load_boston()
x = preprocessing.StandardScaler().fit_transform(boston.data)
feature_columns = learn.infer_real_valued_columns_from_input(x)
regressor = learn.LinearRegressor(feature_columns=feature_columns)
regressor.fit(x, boston.target, steps=200, batch_size=32)
boston_predictions = list(regressor.predict(x, as_iterable=True))
score = metrics.mean_squared_error(boston_predictions, boston.target)
print("MSE: %f" % score)
                       dtype={
                           'value1': numpy.float32,
                           'value2': numpy.float32,
                           'positive': bool
                       },
                       sep=',')
randomized_data = data.reindex(numpy.random.permutation(data.index))

training_examples = get_features(randomized_data.head(900000))
training_targets = get_targets(randomized_data.head(900000))
validation_examples = get_features(randomized_data.head(100000))
validation_targets = get_targets(randomized_data.head(100000))

feature_columns = learn.infer_real_valued_columns_from_input(training_examples)

linear_classifier = learn.LinearClassifier(feature_columns=feature_columns)

for step in range(STEPS):
    linear_classifier.fit(training_examples, training_targets, steps=1)
    e = linear_classifier.evaluate(validation_examples, validation_targets)
    print()
    print('Evaluation Results [step: %d]' % step)
    print('----------------------------')
    print()
    pprint.pprint(e)
    print()

while True:
    values = input('Enter two numbers: ')
    value1, value2 = [
        float(v) for v in re.findall('[-+]?[0-9]*\.?[0-9]+', values)
Beispiel #23
0
from sklearn.neural_network import MLPClassifier
import numpy as np
from scipy import optimize
import sqlite3
from sklearn.preprocessing import StandardScaler
import tensorflow.contrib.learn as skflow
from sklearn import datasets, metrics

iris = datasets.load_iris()
feature_columns = skflow.infer_real_valued_columns_from_input(iris.data)
classifier = skflow.LinearClassifier(n_classes=3,
                                     feature_columns=feature_columns)
classifier.fit(iris.data, iris.target, steps=200, batch_size=64)
iris_predictions = list(classifier.predict(iris.data, as_iterable=True))
score = metrics.accuracy_score(iris.target, iris_predictions)

print(iris.target.shape)
print("")
print(len(iris_predictions))
Beispiel #24
0
def train_and_evaluate(transformed_train_filepattern,
                       transformed_test_filepattern, transformed_metadata_dir,
                       serving_graph_dir):
    """Train the model on training data and evaluate on test data.

  Args:
    transformed_train_filepattern: File pattern for transformed training data
        shards
    transformed_test_filepattern: File pattern for transformed test data shards
    transformed_metadata_dir: Directory containing transformed data metadata
    serving_graph_dir: Directory to save the serving graph

  Returns:
    The results from the estimator's 'evaluate' method
  """

    # Wrap scalars as real valued columns.
    real_valued_columns = [
        feature_column.real_valued_column(key) for key in NUMERIC_COLUMNS
    ]

    # Wrap categorical columns.  Note the combiner is irrelevant since the input
    # only has one value set per feature per instance.
    one_hot_columns = [
        feature_column.sparse_column_with_integerized_feature(
            key, bucket_size=bucket_size, combiner='sum')
        for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES)
    ]

    estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns)

    transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir)
    train_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_train_filepattern,
        training_batch_size=TRAIN_BATCH_SIZE,
        label_keys=[LABEL_COLUMN])

    # Estimate the model using the default optimizer.
    estimator.fit(input_fn=train_input_fn,
                  max_steps=TRAIN_NUM_EPOCHS * NUM_TRAIN_INSTANCES /
                  TRAIN_BATCH_SIZE)

    # Write the serving graph to disk for use in tf.serving
    in_columns = [
        'age', 'workclass', 'education', 'education-num', 'marital-status',
        'occupation', 'relationship', 'race', 'sex', 'capital-gain',
        'capital-loss', 'hours-per-week', 'native-country'
    ]

    if not serving_graph_dir is None:
        serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn(
            raw_metadata=raw_data_metadata,
            transform_savedmodel_dir=serving_graph_dir + '/transform_fn',
            raw_label_keys=[],
            raw_feature_keys=in_columns)
        estimator.export_savedmodel(serving_graph_dir, serving_input_fn)

    # Evaluate model on test dataset.
    eval_input_fn = input_fn_maker.build_training_input_fn(
        transformed_metadata,
        transformed_test_filepattern,
        training_batch_size=1,
        label_keys=[LABEL_COLUMN])

    return estimator.evaluate(input_fn=eval_input_fn, steps=NUM_TEST_INSTANCES)
Beispiel #25
0
    def run_Linear_SVM(self, x_training, y_training, x_testing, y_testing):
        print(
            "|-------------------------------------------------------------------------------|"
        )
        print(
            "|---------------------------------Linear SVM------------------------------------|"
        )
        print(
            "|-------------------------------------------------------------------------------|"
        )
        print()

        y_train = y_training[:, 0]

        y_test = y_testing[:, 0]

        n_classes = len(set(y_train))

        Liner_SVM = learn.LinearClassifier(
            feature_columns=[
                tf.contrib.layers.real_valued_column(
                    "", dimension=x_training.shape[1])
            ],
            n_classes=n_classes,
            optimizer=tf.train.FtrlOptimizer(learning_rate=0.1,
                                             l1_regularization_strength=0.001))

        # Set the parameters by cross-validation
        learning_rate = [0.1]
        l1_regularization_strength = [0.1]

        best_rate = 0
        best_reg = 0
        best_accuracy = 100
        scores = ['accuracy']  # metric for testing
        print("# Tuning hyper-parameters for %s" % scores[0])
        for rate in learning_rate:
            for reg in l1_regularization_strength:
                print()
                clf = learn.LinearClassifier(
                    feature_columns=[
                        tf.contrib.layers.real_valued_column(
                            "", dimension=x_training.shape[1])
                    ],
                    n_classes=n_classes,
                    optimizer=tf.train.FtrlOptimizer(
                        learning_rate=rate, l1_regularization_strength=reg))
                # fit model
                clf.fit(x_training, y_train)
                y_pred = list(Liner_SVM.predict(x_testing))
                acc = sklearn.metrics.accuracy_score(y_test, y_pred)
                if best_accuracy > acc:
                    best_accuracy = acc
                    best_rate = rate
                    best_reg = reg

                print('The accuracy obtained for learning rate:' + str(rate) +
                      ' l1_regularization_strength:' + str(ref) + ' is:' +
                      str(best_accuracy))

        Liner_SVM = learn.LinearClassifier(
            feature_columns=[
                tf.contrib.layers.real_valued_column(
                    "", dimension=x_training.shape[1])
            ],
            n_classes=n_classes,
            optimizer=tf.train.FtrlOptimizer(
                learning_rate=best_rate, l1_regularization_strength=best_reg))
        accuracy_store = []
        print("Accuracy for sector " + str(0) + " : " +
              str(sklearn.metrics.accuracy_score(y_test, y_pred)) +
              " and % of 1's in Test Data : " + str(y_test.mean()))
        for i in range(1, y_training.shape[1]):
            y_train = y_training[:, i]

            y_test = y_testing[:, i]

            n_classes = len(set(y_train))

            Liner_SVM.fit(x_training, y_train, steps=2000)

            y_pred = list(Liner_SVM.predict(x_testing))

            print("Accuracy for sector " + str(i) + " : " +
                  str(sklearn.metrics.accuracy_score(y_test, y_pred)) +
                  " and % of 1's in Test Data : " + str(y_test.mean()))
            accuracy_store.append(
                sklearn.metrics.accuracy_score(y_test, y_pred))
            # Evaluate and report metrics.
            #eval_metrics = classifier.evaluate(input_fn=y_test, steps=1)
            #print(eval_metrics)
        print("The average accuracy is : " + str(np.mean(accuracy_store)))
Beispiel #26
0
from __future__ import division
from __future__ import print_function

import shutil

from sklearn import cross_validation
from sklearn import datasets
from sklearn import metrics
from tensorflow.contrib import learn

iris = datasets.load_iris()
x_train, x_test, y_train, y_test = cross_validation.train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42)

classifier = learn.LinearClassifier(
    feature_columns=learn.infer_real_valued_columns_from_input(x_train),
    n_classes=3)
classifier.fit(x_train, y_train, steps=200)
score = metrics.accuracy_score(y_test, classifier.predict(x_test))
print('Accuracy: {0:f}'.format(score))

# Clean checkpoint folder if exists
try:
    shutil.rmtree('/tmp/skflow_examples/iris_custom_model')
except OSError:
    pass

# Save model, parameters and learned variables.
classifier.save('/tmp/skflow_examples/iris_custom_model')
classifier = None
    return ((x - np.mean(x)) / (np.max(x) - np.min(x)))


def input_function(dataset, train=False):
    dataset.x1 = normalize(dataset.x1)
    dataset.x2 = normalize(dataset.x2)
    feature_cols = {k: tf.constant(dataset[k].values) for k in FEATURES}
    if train:
        labels = tf.constant(dataset[LABEL].values)
        return feature_cols, labels
    return feature_cols


# Build the model with right feature tranformation
feature_cols = [layers.real_valued_column(k) for k in FEATURES]

classifier = learn.LinearClassifier(feature_columns=feature_cols,
                                    n_classes=2,
                                    model_dir="/tmp/model1")
classifier.fit(input_fn=lambda: input_function(sample, True), steps=1000)

classifier.weights_
classifier.bias_

# Predict the outcome using model
dict = {'x1': [10.4, 21.5, 10.5], 'x2': [22.1, 26.1, 2.7]}
test = pd.DataFrame.from_dict(dict)

predictions = classifier.predict(input_fn=lambda: input_function(test, False))
predictions
Beispiel #28
0
import tensorflow.contrib.learn as skflow
from sklearn import datasets, metrics

iris = datasets.load_iris()
classifier = skflow.LinearClassifier(n_classes=3)
classifier.fit(iris.data, iris.target)
score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
print("Accuracy: %f" % score)
Beispiel #29
0

tf.logging.set_verbosity(tf.logging.WARN)

print("Loading mnist database")

# Training data (55k images)
mnist = learn.datasets.load_dataset("mnist")
images_training = mnist.train.images
labels_training = np.asarray(mnist.train.labels, dtype=np.int32)

# Test data (10k images)
images_test = mnist.test.images
labels_test = np.asarray(mnist.test.labels, dtype=np.int32)

# You can print some of the test images using display_test_image
# display_test_image(0)

# Building and train our classifier
feature_columns = learn.infer_real_valued_columns_from_input(images_training)
tensorflow_classifier = learn.LinearClassifier(n_classes=10,
                                               feature_columns=feature_columns)

classifier = learn.SKCompat(tensorflow_classifier)

classifier.fit(x=images_test, y=labels_test, batch_size=100, steps=1000)

# Evaluate accuracy
prediction = classifier.score(images_test, labels_test)
print("Accuracy: %f" % prediction['accuracy'])
Beispiel #30
0
train = pandas.read_csv('data/titanic_train.csv')
y, X = train['Survived'], train[['Age', 'SibSp', 'Fare']].fillna(0)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

lr = LogisticRegression()
lr.fit(X_train, y_train)
print(accuracy_score(lr.predict(X_test), y_test))

# Linear classifier.

random.seed(42)
tflr = learn.LinearClassifier(
    n_classes=2,
    feature_columns=learn.infer_real_valued_columns_from_input(X_train),
    optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05))
tflr.fit(X_train, y_train, batch_size=128, steps=500)
print(accuracy_score(tflr.predict(X_test), y_test))

# 3 layer neural network with rectified linear activation.

random.seed(42)
classifier = learn.DNNClassifier(
    hidden_units=[10, 20, 10],
    n_classes=2,
    feature_columns=learn.infer_real_valued_columns_from_input(X_train),
    optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05))
classifier.fit(X_train, y_train, batch_size=128, steps=500)
print(accuracy_score(classifier.predict(X_test), y_test))