def main(unused_args): ### Download and load MNIST dataset. mnist = learn.datasets.load_dataset('mnist') ### Linear classifier. feature_columns = learn.infer_real_valued_columns_from_input( mnist.train.images) classifier = learn.LinearClassifier(feature_columns=feature_columns, n_classes=10) classifier.fit(mnist.train.images, mnist.train.labels.astype(np.int32), batch_size=100, steps=1000) score = metrics.accuracy_score(mnist.test.labels, list(classifier.predict(mnist.test.images))) print('Accuracy: {0:f}'.format(score)) ### Convolutional network classifier = learn.Estimator(model_fn=conv_model) classifier.fit(mnist.train.images, mnist.train.labels, batch_size=100, steps=20000) score = metrics.accuracy_score(mnist.test.labels, list(classifier.predict(mnist.test.images))) print('Accuracy: {0:f}'.format(score))
def build_lr_estimator(model_dir, feature_count): return estimator.SKCompat( learn.LinearClassifier(feature_columns=[ tf.contrib.layers.real_valued_column("", dimension=feature_count) ], n_classes=2, model_dir=model_dir))
def linear_model(output_dir): real, sparse = get_features() all = {} all.update(real) all.update(sparse) estimator = tflearn.LinearClassifier(model_dir=output_dir, feature_columns=all.values()) estimator.params["head"]._thresholds = [0.7] # FIXME: hack return estimator
def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES, num_test_instances=NUM_TEST_INSTANCES): """Train the model on training data and evaluate on test data. Args: working_dir: Directory to read transformed data and metadata from and to write exported model to. num_train_instances: Number of instances in train set num_test_instances: Number of instances in test set Returns: The results from the estimator's 'evaluate' method """ tf_transform_output = tft.TFTransformOutput(working_dir) # Wrap scalars as real valued columns. real_valued_columns = [ tf.feature_column.numeric_column(key, shape=()) for key in NUMERIC_FEATURE_KEYS ] # Wrap categorical columns. one_hot_columns = [ tf.feature_column.categorical_column_with_vocabulary_file( key=key, vocabulary_file=tf_transform_output.vocabulary_file_by_name( vocab_filename=key)) for key in CATEGORICAL_FEATURE_KEYS ] estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns) # Fit the model using the default optimizer. train_input_fn = _make_training_input_fn( tf_transform_output, os.path.join(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE + '*'), batch_size=TRAIN_BATCH_SIZE) estimator.fit(input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE) # Evaluate model on test dataset. eval_input_fn = _make_training_input_fn( tf_transform_output, os.path.join(working_dir, TRANSFORMED_TEST_DATA_FILEBASE + '*'), batch_size=1) # Export the model. serving_input_fn = _make_serving_input_fn(tf_transform_output) exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR) estimator.export_savedmodel(exported_model_dir, serving_input_fn) return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
def train_and_evaluate(transformed_train_filepattern, transformed_test_filepattern, transformed_metadata_dir, num_train_instances=NUM_TRAIN_INSTANCES, num_test_instances=NUM_TEST_INSTANCES): """Train the model on training data and evaluate on test data. Args: transformed_train_filepattern: File pattern for transformed training data shards transformed_test_filepattern: File pattern for transformed test data shards transformed_metadata_dir: Directory containing transformed data metadata num_train_instances: Number of instances in train set num_test_instances: Number of instances in test set Returns: The results from the estimator's 'evaluate' method """ # Wrap scalars as real valued columns. real_valued_columns = [feature_column.real_valued_column(key) for key in NUMERIC_COLUMNS] # Wrap categorical columns. Note the combiner is irrelevant since the input # only has one value set per feature per instance. one_hot_columns = [ feature_column.sparse_column_with_integerized_feature( key, bucket_size=bucket_size, combiner='sum') for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES)] estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns) transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir) train_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_train_filepattern, training_batch_size=TRAIN_BATCH_SIZE, label_keys=[LABEL_COLUMN]) # Estimate the model using the default optimizer. estimator.fit( input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE) # Evaluate model on test dataset. eval_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_test_filepattern, training_batch_size=1, label_keys=[LABEL_COLUMN]) return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
def train_and_evaluate(transformed_train_filepattern, transformed_test_filepattern, transformed_metadata_dir, num_train_instances=NUM_TRAIN_INSTANCES, num_test_instances=NUM_TEST_INSTANCES): """Train the model on training data and evaluate on evaluation data. Args: transformed_train_filepattern: Base filename for transformed training data shards transformed_test_filepattern: Base filename for transformed evaluation data shards transformed_metadata_dir: Directory containing transformed data metadata num_train_instances: Number of instances in train set num_test_instances: Number of instances in test set Returns: The results from the estimator's 'evaluate' method """ # Unrecognized tokens are represented by -1, but # sparse_column_with_integerized_feature uses the mod operator to map integers # to the range [0, bucket_size). By choosing bucket_size=VOCAB_SIZE + 1, we # represent unrecognized tokens as VOCAB_SIZE. review_column = feature_column.sparse_column_with_integerized_feature( REVIEW_COLUMN, bucket_size=VOCAB_SIZE + 1, combiner='sum') weighted_reviews = feature_column.weighted_sparse_column( review_column, REVIEW_WEIGHT) estimator = learn.LinearClassifier([weighted_reviews]) transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir) train_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_train_filepattern, training_batch_size=TRAIN_BATCH_SIZE, label_keys=[LABEL_COLUMN]) # Estimate the model using the default optimizer. estimator.fit(input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE) # Evaluate model on eval dataset. eval_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_test_filepattern, training_batch_size=1, label_keys=[LABEL_COLUMN]) return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES, num_test_instances=NUM_TEST_INSTANCES): """Train the model on training data and evaluate on test data. Args: working_dir: Directory to read transformed data and metadata from and to write exported model to. num_train_instances: Number of instances in train set num_test_instances: Number of instances in test set Returns: The results from the estimator's 'evaluate' method """ # Wrap scalars as real valued columns. real_valued_columns = [ tf.feature_column.numeric_column(key, shape=()) for key in NUMERIC_FEATURE_KEYS ] # Wrap categorical columns. Note the combiner is irrelevant since the input # only has one value set per feature per instance. one_hot_columns = [ tf.feature_column.categorical_column_with_identity( key, num_buckets=num_buckets) for key, num_buckets in zip(CATEGORICAL_FEATURE_KEYS, BUCKET_SIZES) ] estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns) # Fit the model using the default optimizer. train_input_fn = _make_training_input_fn(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE, batch_size=TRAIN_BATCH_SIZE) estimator.fit(input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE) # Evaluate model on test dataset. eval_input_fn = _make_training_input_fn(working_dir, TRANSFORMED_TEST_DATA_FILEBASE, batch_size=1) # Export the model. serving_input_fn = _make_serving_input_fn(working_dir) exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR) estimator.export_savedmodel(exported_model_dir, serving_input_fn) return estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances)
def Linearsklearn(): NUM_STEPS = 200 MINIBATCH_SIZE = 506 feature_columns = learn.infer_real_valued_columns_from_input(x_data) reg = learn.LinearClassifier( feature_columns = feature_columns, optimizer= tf.train.GradientDescentOptimizer(learning_rate=0.1) ) reg.fit(x_data, boston.target, steps= NUM_STEPS,batch_size=MINIBATCH_SIZE) MSE = reg.evaluate(x_data,boston.target, steps=1) print(MSE)
def build_estimator(model_dir, model_type): """build an estimator""" # base sparse feature process gender = layers.sparse_column_with_keys(column_name='gender', keys=['female', 'male']) education = layers.sparse_column_with_hash_bucket(column_name='education', hash_bucket_size=1000) relationship = layers.sparse_column_with_hash_bucket(column_name='relationship', hash_bucket_size=100) workclass = layers.sparse_column_with_hash_bucket(column_name='workclass', hash_bucket_size=100) occupation = layers.sparse_column_with_hash_bucket(column_name='occupation', hash_bucket_size=1000) native_country = layers.sparse_column_with_hash_bucket(column_name='native_country', hash_bucket_size=1000) # base continuous feature age = layers.real_valued_column(column_name='age') education_num = layers.real_valued_column(column_name='education_num') capital_gain = layers.real_valued_column(column_name='capital_gain') capital_loss = layers.real_valued_column(column_name='capital_loss') hours_per_week = layers.real_valued_column(column_name='hours_per_week') # transformation.bucketization 将连续变量转化为类别标签。从而提高我们的准确性 age_bucket = layers.bucketized_column(source_column=age, boundaries=[18, 25, 30, 35, 40, 45,50, 55, 60, 65]) # wide columns and deep columns # 深度模型使用到的特征和广度模型使用到的特征 # 广度模型特征只只用到了分类标签 wide_columns = [gender, native_country, education, relationship, workclass, occupation, age_bucket, layers.crossed_column(columns=[education, occupation], hash_bucket_size=int(1e4)), layers.crossed_column(columns=[age_bucket, education, occupation], hash_bucket_size=int(1e6)), layers.crossed_column(columns=[native_country, occupation], hash_bucket_size=int(1e4))] deep_columns = [layers.embedding_column(workclass, dimension=8), layers.embedding_column(education, dimension=8), layers.embedding_column(gender, dimension=8), layers.embedding_column(relationship, dimension=8), layers.embedding_column(native_country, dimension=8), layers.embedding_column(occupation, dimension=8), age, education_num, capital_gain, capital_loss, hours_per_week] if model_type == "wide": m=learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir) elif model_type == "deep": m=learn.DNNClassifier(feature_columns=deep_columns, model_dir=model_dir, hidden_units=[100, 50]) else: m=learn.DNNLinearCombinedClassifier(model_dir=model_dir, linear_feature_columns=wide_columns, dnn_feature_columns=deep_columns, dnn_hidden_units=[256, 128, 64], dnn_activation_fn=tf.nn.relu) return m
def train_and_evaluate(transformed_train_data_base, transformed_eval_data_base, transformed_metadata_dir): """Train the model on training data and evaluate on evaluation data. Args: transformed_train_data_base: Base filename for transformed training data shards transformed_eval_data_base: Base filename for cleaned evaluation data shards transformed_metadata_dir: Directory containing transformed data metadata. Returns: The results from the estimator's 'evaluate' method. """ # Wrap scalars as real valued columns. real_valued_columns = [ feature_column.real_valued_column(key) for key in NUMERIC_COLUMNS ] # Wrap categorical columns. one_hot_columns = [ feature_column.sparse_column_with_integerized_feature( key, bucket_size=bucket_size) for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES) ] estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns) transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir) train_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_train_data_base + '*', training_batch_size=TRAIN_BATCH_SIZE, label_keys=['label']) # Estimate the model using the default optimizer. estimator.fit(input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * NUM_TRAIN_INSTANCES / TRAIN_BATCH_SIZE) # Evaluate model on eval dataset. eval_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_eval_data_base + '*', training_batch_size=1, label_keys=['label']) return estimator.evaluate(input_fn=eval_input_fn, steps=NUM_EVAL_INSTANCES)
def train_and_evaluate(working_dir, num_train_instances=NUM_TRAIN_INSTANCES, num_test_instances=NUM_TEST_INSTANCES): """Train the model on training data and evaluate on evaluation data. Args: working_dir: Directory to read transformed data and metadata from. num_train_instances: Number of instances in train set num_test_instances: Number of instances in test set Returns: The results from the estimator's 'evaluate' method """ tf_transform_output = tft.TFTransformOutput(working_dir) # Unrecognized tokens are represented by -1, but # categorical_column_with_identity uses the mod operator to map integers # to the range [0, bucket_size). By choosing bucket_size=VOCAB_SIZE + 1, we # represent unrecognized tokens as VOCAB_SIZE. review_column = tf.feature_column.categorical_column_with_identity( REVIEW_KEY, num_buckets=VOCAB_SIZE + 1) weighted_reviews = tf.feature_column.weighted_categorical_column( review_column, REVIEW_WEIGHT_KEY) estimator = learn.LinearClassifier([weighted_reviews]) # Fit the model using the default optimizer. train_input_fn = _make_training_input_fn( tf_transform_output, os.path.join(working_dir, TRANSFORMED_TRAIN_DATA_FILEBASE + '*'), batch_size=TRAIN_BATCH_SIZE) estimator.fit(input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * num_train_instances / TRAIN_BATCH_SIZE) # Evaluate model on eval dataset. eval_input_fn = _make_training_input_fn( tf_transform_output, os.path.join(working_dir, TRANSFORMED_TEST_DATA_FILEBASE + '*'), batch_size=1) result = estimator.evaluate(input_fn=eval_input_fn, steps=num_test_instances) # Export the model. serving_input_fn = _make_serving_input_fn(tf_transform_output) exported_model_dir = os.path.join(working_dir, EXPORTED_MODEL_DIR) estimator.export_savedmodel(exported_model_dir, serving_input_fn) return result
def train_and_evaluate(output_dir): review_column = feature_column.sparse_column_with_integerized_feature( const.REVIEW_COLUMN, bucket_size=vocab_size + 1, combiner='sum') weighted_reviews = feature_column.weighted_sparse_column( review_column, const.REVIEW_WEIGHT) estimator = learn.LinearClassifier( feature_columns=[weighted_reviews], n_classes=2, model_dir=output_dir, config=tf.contrib.learn.RunConfig(save_checkpoints_secs=30)) transformed_metadata = metadata_io.read_metadata( transformed_metadata_dir) raw_metadata = metadata_io.read_metadata(raw_metadata_dir) train_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_train_file_pattern, training_batch_size=train_batch_size, label_keys=[const.LABEL_COLUMN]) eval_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_test_file_pattern, training_batch_size=1, label_keys=[const.LABEL_COLUMN]) serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn( raw_metadata=raw_metadata, transform_savedmodel_dir=output_dir + '/transform_fn', raw_label_keys=[], raw_feature_keys=[const.REVIEW_COLUMN]) export_strategy = saved_model_export_utils.make_export_strategy( serving_input_fn, exports_to_keep=5, default_output_alternative_key=None) return tf.contrib.learn.Experiment(estimator=estimator, train_steps=train_num_epochs * num_train_instances / train_batch_size, eval_steps=num_test_instances, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, export_strategies=export_strategy, min_eval_frequency=500)
def run7(): mnist = learn.datasets.load_dataset('mnist') data = mnist.train.images labels = np.asarray(mnist.train.labels, dtype=np.int32) test_data = mnist.test.images test_labels = np.asarray(mnist.test.labels, dtype=np.int32) max_examples = 1000 data = data[:max_examples] labels = labels[:max_examples] def display(i): img = test_data[i] plt.title('Example {}, Label: {}'.format(i, test_labels[i])) plt.imshow(img.reshape((28, 28)), cmap=plt.cm.gray) plt.show() # display(0) feature_columns = learn.infer_real_valued_columns_from_input(data) clf = learn.LinearClassifier(feature_columns=feature_columns, n_classes=10) clf.fit(data, labels, batch_size=100, steps=1000) "0.8607" print(clf.evaluate(test_data, test_labels)['accuracy']) # print('Predicted {}, Label: {}'.format(clf.predict(test_data[0]), test_labels[0])) weights = clf.weights_ f, axes = plt.subplots(2, 5, figsize=(10, 4)) axes = axes.reshape(-1) for i in range(len(axes)): a = axes[i] a.imshow(weights.T[i].reshape(28, 28), cmap=plt.cm.seismic) a.set_title(i) a.set_xticks(()) a.set_yticks(()) plt.show()
def trainClassifier(self): features = [] featureExtraction = featureExtractor self.sensorData = np.array(self.sensorData) self.sensorData = self.sensorData[0:, 0:, 1:] self.soundData = np.array(self.soundData) self.labels = np.array(self.labels) for x in range(len(self.sensorData)): features.append( featureExtraction.extract(featureExtraction, self.sensorData[x], self.soundData[x, 0:])) features = np.array(features) training = features[0:] test = features[500:550] validation = features[550:] training_labels = self.labels[0:] test_labels = self.labels[500:550] validaton_labels = self.labels[550:] train_dataset, train_labels = self.randomize(training, training_labels) test_dataset, test_labels = self.randomize(test, test_labels) validation_dataset, validation_labels = self.randomize( validation, validaton_labels) feature_columns = learn.infer_real_valued_columns_from_input( train_dataset) print("loading...") classifier = learn.LinearClassifier( n_classes=5, feature_columns=feature_columns, optimizer=tf.train.FtrlOptimizer(learning_rate_power=-0.69, learning_rate=0.00001, l1_regularization_strength=0.1)) classifier.fit(train_dataset, train_labels, steps=30000) print("done") self.linearClassifier = classifier
train[training_idx,:] test_labels, train_labels = labels[test_idx],\ labels[training_idx] train = np.array(train, dtype=np.float32) test = np.array(test, dtype=np.float32) train_labels = np.array(train_labels, dtype=np.int32) test_labels = np.array(test_labels, dtype=np.int32) # Convert features to learn style feature_columns = learn.infer_real_valued_columns_from_input( train.reshape([-1, 36 * 36])) # Logistic Regression classifier = estimator.SKCompat( learn.LinearClassifier(feature_columns=feature_columns, n_classes=5)) # One line training # steps is number of total batches # steps*batch_size/len(train) = num_epochs classifier.fit(train.reshape([-1, 36 * 36]), train_labels, steps=1024, batch_size=32) # sklearn compatible accuracy test_probs = classifier.predict(test.reshape([-1, 36 * 36])) sklearn.metrics.accuracy_score(test_labels, test_probs['classes']) # Dense neural net classifier = estimator.SKCompat(
#plt.imshow(digits['images'][66], cmap="gray", interpolation='none') #plt.show() from sklearn import svm classifier = svm.SVC(gamma=0.001) classifier.fit(digits.data, digits.target) predicted = classifier.predict(digits.data) print(np.mean(digits.target == predicted)) X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target) import tensorflow as tf from tensorflow.contrib import learn n_classes = len(set(y_train)) classifier = learn.LinearClassifier(feature_columns=[ tf.contrib.layers.real_valued_column("", dimension=X_train.shape[1]) ], n_classes=n_classes) classifier.fit(X_train, y_train, steps=10) y_pred = classifier.predict(X_test) from sklearn import metrics print(metrics.classification_report(y_true=y_test, y_pred=y_pred))
one_image = img.reshape(image_width, image_height) plt.axis('off') plt.imshow(one_image, cmap=cm.binary) plt.show() #pylab.show() # output image display(X_train[1:2].values) mnist = learn.datasets.load_dataset('mnist') feature_columns = learn.infer_real_valued_columns_from_input( mnist.train.images) classifier = learn.LinearClassifier( feature_columns=feature_columns, n_classes=10, optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.01)) classifier.fit(X_train, y_train, steps=1000, batch_size=100) linear_y_predict = classifier.predict(X_test) accuracy_score = classifier.evaluate(X_train, y_train)["accuracy"] print accuracy_score print linear_y_predict[:100] linear_submission = pd.DataFrame({ 'ImageId': range(1, 28001), 'Label': linear_y_predict }) linear_submission.to_csv('linear_submission.csv', index=False) print 'linear done' classifier = learn.DNNClassifier(
def build_estimator(model_dir, classifier): # Categorical columns sex = tf.contrib.layers.sparse_column_with_keys(column_name="Sex", keys=["female", "male"]) family = tf.contrib.layers.sparse_column_with_keys(column_name="Family", keys=["Large", "Nuclear", "Solo"]) child = tf.contrib.layers.sparse_column_with_keys(column_name="Child", keys=["0", "1"]) ageknown = tf.contrib.layers.sparse_column_with_keys(column_name="AgeKnown", keys=["0", "1"]) embarked = tf.contrib.layers.sparse_column_with_keys(column_name="Embarked", keys=["C", "S", "Q"]) young = tf.contrib.layers.sparse_column_with_keys(column_name="Young", keys=["0", "1"]) malebadticket = tf.contrib.layers.sparse_column_with_keys(column_name="MaleBadTicket", keys=["0", "1"]) cab = tf.contrib.layers.sparse_column_with_hash_bucket( "Cab", hash_bucket_size=10) namet = tf.contrib.layers.sparse_column_with_hash_bucket( "NameT", hash_bucket_size=20) # Continuous columns age = tf.contrib.layers.real_valued_column("Age") namelength = tf.contrib.layers.real_valued_column("NameLength") fare = tf.contrib.layers.real_valued_column("Fare") p_class = tf.contrib.layers.real_valued_column("Pclass") # Transformations. fare_buckets = tf.contrib.layers.bucketized_column(fare, boundaries=[ 5, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500, 550 ]) age_buckets = tf.contrib.layers.bucketized_column(age, boundaries=[ 5, 18, 25, 30, 35, 40, 45, 50, 55, 65 ]) pclass_buckets = tf.contrib.layers.bucketized_column(p_class, boundaries=[1, 2, 3]) # Wide columns and deep columns. wide_columns = [sex, cab, namet, child, ageknown, embarked, young, family, tf.contrib.layers.crossed_column( [age_buckets, sex], hash_bucket_size=int(1e3)), tf.contrib.layers.crossed_column( [pclass_buckets, sex], hash_bucket_size=int(1e3)), tf.contrib.layers.crossed_column( [fare_buckets, pclass_buckets], hash_bucket_size=int(1e3)), tf.contrib.layers.crossed_column( [embarked, pclass_buckets], hash_bucket_size=int(1e3)), tf.contrib.layers.crossed_column( [embarked, sex], hash_bucket_size=int(1e3))] deep_columns = [ namelength, fare, p_class, tf.contrib.layers.embedding_column(sex, dimension=8), tf.contrib.layers.embedding_column(child, dimension=8), tf.contrib.layers.embedding_column(family, dimension=8), tf.contrib.layers.embedding_column(cab, dimension=8), tf.contrib.layers.embedding_column(namet, dimension=8), tf.contrib.layers.embedding_column(ageknown, dimension=8), tf.contrib.layers.embedding_column(embarked, dimension=8), tf.contrib.layers.embedding_column(young, dimension=8), tf.contrib.layers.embedding_column(malebadticket, dimension=8) ] if classifier == "deep": return Learn.DNNClassifier(model_dir=model_dir, feature_columns=deep_columns, hidden_units=[32, 16], optimizer=tf.train.ProximalAdagradOptimizer( learning_rate=0.1, l2_regularization_strength=0.001)) elif classifier == "wide": return Learn.LinearClassifier( feature_columns=wide_columns, optimizer=tf.train.FtrlOptimizer( learning_rate=5, l1_regularization_strength=1000.0, l2_regularization_strength=1000.0), model_dir=model_dir) else: return Learn.DNNLinearCombinedClassifier( linear_feature_columns=wide_columns, dnn_feature_columns=deep_columns, dnn_hidden_units=[32, 16], model_dir=model_dir, linear_optimizer=tf.train.FtrlOptimizer( learning_rate=10, l1_regularization_strength=100.0, l2_regularization_strength=100.0), dnn_optimizer=tf.train.ProximalAdagradOptimizer( learning_rate=0.1, l2_regularization_strength=0.001))
# The above code is equivalent to the below commented python code # But the above 'learn' version is recommended since it can run on any device - CPU, GPU, # mobile, whereas the below python code will run only on CPU # x = sample[['x1','x2']].as_matrix() # y = sample[['label']].as_matrix() # Divide the input data into train and validation x_train,x_validate,y_train,y_validate = model_selection.train_test_split(x, y, test_size=0.2, random_state=100) type(x_train) #feature engineering feature_cols = [layers.real_valued_column("", dimension=2)] #build the model configuration classifier = learn.LinearClassifier(feature_columns=feature_cols, n_classes=2, model_dir="/home/algo/Algorithmica/tmp") #build the model classifier.fit(x=x_train, y=y_train, steps=1000) classifier.weights_ classifier.bias_ # By default, enable_centered_bias = True in learn.LinearClassifier centered_bias_weight = classifier.get_variable_value("centered_bias_weight") #evaluate the model using validation set results = classifier.evaluate(x=x_validate, y=y_validate, steps=1) type(results) for key in sorted(results): print "%s:%s" % (key, results[key])
features_dtype=np.float32, target_column=-1) X = sample.data y = sample.target # Divide the input data into train and validation X_train, X_validation, y_train, y_validation = model_selection.train_test_split( X, y, test_size=0.2, random_state=100) type(X_train) #feature engineering feature_cols = [layers.real_valued_column("", dimension=2)] #build the model configuration classifier = learn.LinearClassifier(feature_columns=feature_cols, n_classes=2, model_dir="/home/algo/m2") #build the model classifier.fit(x=X_train, y=y_train, steps=1000) classifier.weights_ classifier.bias_ #evaluate the model using validation set results = classifier.evaluate(x=X_validation, y=y_validation, steps=1) type(results) for key in sorted(results): print "%s:%s" % (key, results[key]) # Predict the outcome of test data using model X_test = np.array([[100.4, 21.5], [200.1, 26.1]])
# #script harvested from: #https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/learn/python/learn # # skflow intro clasifiers #--------------------------------------- # import tensorflow.contrib.learn as learn from sklearn import datasets, metrics, preprocessing import tensorflow as tf import tensorflow.contrib.layers as layers #Linear Classifier iris = datasets.load_iris() feature_columns = learn.infer_real_valued_columns_from_input(iris.data) classifier = learn.LinearClassifier(n_classes=3, feature_columns=feature_columns) classifier.fit(iris.data, iris.target, steps=200, batch_size=32) iris_predictions = list(classifier.predict(iris.data, as_iterable=True)) score = metrics.accuracy_score(iris.target, iris_predictions) print("Accuracy: %f" % score) #Linear Regression boston = datasets.load_boston() x = preprocessing.StandardScaler().fit_transform(boston.data) feature_columns = learn.infer_real_valued_columns_from_input(x) regressor = learn.LinearRegressor(feature_columns=feature_columns) regressor.fit(x, boston.target, steps=200, batch_size=32) boston_predictions = list(regressor.predict(x, as_iterable=True)) score = metrics.mean_squared_error(boston_predictions, boston.target) print("MSE: %f" % score)
dtype={ 'value1': numpy.float32, 'value2': numpy.float32, 'positive': bool }, sep=',') randomized_data = data.reindex(numpy.random.permutation(data.index)) training_examples = get_features(randomized_data.head(900000)) training_targets = get_targets(randomized_data.head(900000)) validation_examples = get_features(randomized_data.head(100000)) validation_targets = get_targets(randomized_data.head(100000)) feature_columns = learn.infer_real_valued_columns_from_input(training_examples) linear_classifier = learn.LinearClassifier(feature_columns=feature_columns) for step in range(STEPS): linear_classifier.fit(training_examples, training_targets, steps=1) e = linear_classifier.evaluate(validation_examples, validation_targets) print() print('Evaluation Results [step: %d]' % step) print('----------------------------') print() pprint.pprint(e) print() while True: values = input('Enter two numbers: ') value1, value2 = [ float(v) for v in re.findall('[-+]?[0-9]*\.?[0-9]+', values)
from sklearn.neural_network import MLPClassifier import numpy as np from scipy import optimize import sqlite3 from sklearn.preprocessing import StandardScaler import tensorflow.contrib.learn as skflow from sklearn import datasets, metrics iris = datasets.load_iris() feature_columns = skflow.infer_real_valued_columns_from_input(iris.data) classifier = skflow.LinearClassifier(n_classes=3, feature_columns=feature_columns) classifier.fit(iris.data, iris.target, steps=200, batch_size=64) iris_predictions = list(classifier.predict(iris.data, as_iterable=True)) score = metrics.accuracy_score(iris.target, iris_predictions) print(iris.target.shape) print("") print(len(iris_predictions))
def train_and_evaluate(transformed_train_filepattern, transformed_test_filepattern, transformed_metadata_dir, serving_graph_dir): """Train the model on training data and evaluate on test data. Args: transformed_train_filepattern: File pattern for transformed training data shards transformed_test_filepattern: File pattern for transformed test data shards transformed_metadata_dir: Directory containing transformed data metadata serving_graph_dir: Directory to save the serving graph Returns: The results from the estimator's 'evaluate' method """ # Wrap scalars as real valued columns. real_valued_columns = [ feature_column.real_valued_column(key) for key in NUMERIC_COLUMNS ] # Wrap categorical columns. Note the combiner is irrelevant since the input # only has one value set per feature per instance. one_hot_columns = [ feature_column.sparse_column_with_integerized_feature( key, bucket_size=bucket_size, combiner='sum') for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES) ] estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns) transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir) train_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_train_filepattern, training_batch_size=TRAIN_BATCH_SIZE, label_keys=[LABEL_COLUMN]) # Estimate the model using the default optimizer. estimator.fit(input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * NUM_TRAIN_INSTANCES / TRAIN_BATCH_SIZE) # Write the serving graph to disk for use in tf.serving in_columns = [ 'age', 'workclass', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country' ] if not serving_graph_dir is None: serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn( raw_metadata=raw_data_metadata, transform_savedmodel_dir=serving_graph_dir + '/transform_fn', raw_label_keys=[], raw_feature_keys=in_columns) estimator.export_savedmodel(serving_graph_dir, serving_input_fn) # Evaluate model on test dataset. eval_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_test_filepattern, training_batch_size=1, label_keys=[LABEL_COLUMN]) return estimator.evaluate(input_fn=eval_input_fn, steps=NUM_TEST_INSTANCES)
def run_Linear_SVM(self, x_training, y_training, x_testing, y_testing): print( "|-------------------------------------------------------------------------------|" ) print( "|---------------------------------Linear SVM------------------------------------|" ) print( "|-------------------------------------------------------------------------------|" ) print() y_train = y_training[:, 0] y_test = y_testing[:, 0] n_classes = len(set(y_train)) Liner_SVM = learn.LinearClassifier( feature_columns=[ tf.contrib.layers.real_valued_column( "", dimension=x_training.shape[1]) ], n_classes=n_classes, optimizer=tf.train.FtrlOptimizer(learning_rate=0.1, l1_regularization_strength=0.001)) # Set the parameters by cross-validation learning_rate = [0.1] l1_regularization_strength = [0.1] best_rate = 0 best_reg = 0 best_accuracy = 100 scores = ['accuracy'] # metric for testing print("# Tuning hyper-parameters for %s" % scores[0]) for rate in learning_rate: for reg in l1_regularization_strength: print() clf = learn.LinearClassifier( feature_columns=[ tf.contrib.layers.real_valued_column( "", dimension=x_training.shape[1]) ], n_classes=n_classes, optimizer=tf.train.FtrlOptimizer( learning_rate=rate, l1_regularization_strength=reg)) # fit model clf.fit(x_training, y_train) y_pred = list(Liner_SVM.predict(x_testing)) acc = sklearn.metrics.accuracy_score(y_test, y_pred) if best_accuracy > acc: best_accuracy = acc best_rate = rate best_reg = reg print('The accuracy obtained for learning rate:' + str(rate) + ' l1_regularization_strength:' + str(ref) + ' is:' + str(best_accuracy)) Liner_SVM = learn.LinearClassifier( feature_columns=[ tf.contrib.layers.real_valued_column( "", dimension=x_training.shape[1]) ], n_classes=n_classes, optimizer=tf.train.FtrlOptimizer( learning_rate=best_rate, l1_regularization_strength=best_reg)) accuracy_store = [] print("Accuracy for sector " + str(0) + " : " + str(sklearn.metrics.accuracy_score(y_test, y_pred)) + " and % of 1's in Test Data : " + str(y_test.mean())) for i in range(1, y_training.shape[1]): y_train = y_training[:, i] y_test = y_testing[:, i] n_classes = len(set(y_train)) Liner_SVM.fit(x_training, y_train, steps=2000) y_pred = list(Liner_SVM.predict(x_testing)) print("Accuracy for sector " + str(i) + " : " + str(sklearn.metrics.accuracy_score(y_test, y_pred)) + " and % of 1's in Test Data : " + str(y_test.mean())) accuracy_store.append( sklearn.metrics.accuracy_score(y_test, y_pred)) # Evaluate and report metrics. #eval_metrics = classifier.evaluate(input_fn=y_test, steps=1) #print(eval_metrics) print("The average accuracy is : " + str(np.mean(accuracy_store)))
from __future__ import division from __future__ import print_function import shutil from sklearn import cross_validation from sklearn import datasets from sklearn import metrics from tensorflow.contrib import learn iris = datasets.load_iris() x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) classifier = learn.LinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(x_train), n_classes=3) classifier.fit(x_train, y_train, steps=200) score = metrics.accuracy_score(y_test, classifier.predict(x_test)) print('Accuracy: {0:f}'.format(score)) # Clean checkpoint folder if exists try: shutil.rmtree('/tmp/skflow_examples/iris_custom_model') except OSError: pass # Save model, parameters and learned variables. classifier.save('/tmp/skflow_examples/iris_custom_model') classifier = None
return ((x - np.mean(x)) / (np.max(x) - np.min(x))) def input_function(dataset, train=False): dataset.x1 = normalize(dataset.x1) dataset.x2 = normalize(dataset.x2) feature_cols = {k: tf.constant(dataset[k].values) for k in FEATURES} if train: labels = tf.constant(dataset[LABEL].values) return feature_cols, labels return feature_cols # Build the model with right feature tranformation feature_cols = [layers.real_valued_column(k) for k in FEATURES] classifier = learn.LinearClassifier(feature_columns=feature_cols, n_classes=2, model_dir="/tmp/model1") classifier.fit(input_fn=lambda: input_function(sample, True), steps=1000) classifier.weights_ classifier.bias_ # Predict the outcome using model dict = {'x1': [10.4, 21.5, 10.5], 'x2': [22.1, 26.1, 2.7]} test = pd.DataFrame.from_dict(dict) predictions = classifier.predict(input_fn=lambda: input_function(test, False)) predictions
import tensorflow.contrib.learn as skflow from sklearn import datasets, metrics iris = datasets.load_iris() classifier = skflow.LinearClassifier(n_classes=3) classifier.fit(iris.data, iris.target) score = metrics.accuracy_score(iris.target, classifier.predict(iris.data)) print("Accuracy: %f" % score)
tf.logging.set_verbosity(tf.logging.WARN) print("Loading mnist database") # Training data (55k images) mnist = learn.datasets.load_dataset("mnist") images_training = mnist.train.images labels_training = np.asarray(mnist.train.labels, dtype=np.int32) # Test data (10k images) images_test = mnist.test.images labels_test = np.asarray(mnist.test.labels, dtype=np.int32) # You can print some of the test images using display_test_image # display_test_image(0) # Building and train our classifier feature_columns = learn.infer_real_valued_columns_from_input(images_training) tensorflow_classifier = learn.LinearClassifier(n_classes=10, feature_columns=feature_columns) classifier = learn.SKCompat(tensorflow_classifier) classifier.fit(x=images_test, y=labels_test, batch_size=100, steps=1000) # Evaluate accuracy prediction = classifier.score(images_test, labels_test) print("Accuracy: %f" % prediction['accuracy'])
train = pandas.read_csv('data/titanic_train.csv') y, X = train['Survived'], train[['Age', 'SibSp', 'Fare']].fillna(0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) lr = LogisticRegression() lr.fit(X_train, y_train) print(accuracy_score(lr.predict(X_test), y_test)) # Linear classifier. random.seed(42) tflr = learn.LinearClassifier( n_classes=2, feature_columns=learn.infer_real_valued_columns_from_input(X_train), optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05)) tflr.fit(X_train, y_train, batch_size=128, steps=500) print(accuracy_score(tflr.predict(X_test), y_test)) # 3 layer neural network with rectified linear activation. random.seed(42) classifier = learn.DNNClassifier( hidden_units=[10, 20, 10], n_classes=2, feature_columns=learn.infer_real_valued_columns_from_input(X_train), optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.05)) classifier.fit(X_train, y_train, batch_size=128, steps=500) print(accuracy_score(classifier.predict(X_test), y_test))