def loadDNNonly(modeldir=os.getcwd()+os.sep+"DNN"+os.sep): tf.logging.set_verbosity(tf.logging.ERROR) X, Ytrain = getXYDNN("refined_turning_data.csv") classifier = skflow.DNNClassifier( feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X), hidden_units=[128, 128], n_classes=3, model_dir=modeldir) return classifier
def train_and_eval(train_steps, log_dir, training_set, validation_set, testing_set, ): sparse_columns = [ layers.sparse_column_with_keys(attribute, training_set[attribute].unique()) for attribute in FEATURE_ATTRIBUTES ] embedding_columns = [ layers.embedding_column(column, dimension=8) for column in sparse_columns ] m = learn.DNNClassifier( hidden_units=[10, 50, ], feature_columns=embedding_columns, model_dir=log_dir, config=learn.RunConfig(save_checkpoints_secs=1, ), ) validation_metrics = { "accuracy": learn.MetricSpec(metric_fn=metrics.streaming_accuracy, prediction_key="classes"), "precision": learn.MetricSpec(metric_fn=metrics.streaming_precision, prediction_key="classes"), "recall": learn.MetricSpec(metric_fn=metrics.streaming_recall, prediction_key="classes"), } monitors = [ learn.monitors.ValidationMonitor( input_fn=lambda: input_fn(validation_set), every_n_steps=1000, metrics=validation_metrics, early_stopping_rounds=1, ), ] m.fit( input_fn=lambda: input_fn(training_set), steps=train_steps, monitors=monitors, ) results = m.evaluate(input_fn=lambda: input_fn(testing_set), steps=1) for key in sorted(results): print("%s: %s" % (key, results[key]))
def main(unused_argv): # Load dataset. iris = learn.datasets.load_dataset('iris') x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) # Note that we are saving and load iris data as h5 format as a simple # demonstration here. h5f = h5py.File('/tmp/test_hdf5.h5', 'w') h5f.create_dataset('X_train', data=x_train) h5f.create_dataset('X_test', data=x_test) h5f.create_dataset('y_train', data=y_train) h5f.create_dataset('y_test', data=y_test) h5f.close() h5f = h5py.File('/tmp/test_hdf5.h5', 'r') x_train = np.array(h5f['X_train']) x_test = np.array(h5f['X_test']) y_train = np.array(h5f['y_train']) y_test = np.array(h5f['y_test']) # Build 3 layer DNN with 10, 20, 10 units respectively. feature_columns = learn.infer_real_valued_columns_from_input(x_train) classifier = learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) # Fit and predict. classifier.fit(x_train, y_train, steps=200) score = metrics.accuracy_score(y_test, classifier.predict(x_test)) print('Accuracy: {0:f}'.format(score))
def part3(): global boston, x_data, y_data import sys import numpy as np from tensorflow.examples.tutorials.mnist import input_data DATA_DIR = 'c:\\tmp\\data' data = input_data.read_data_sets(DATA_DIR, one_hot=False) x_data, y_data = data.train.images, data.train.labels.astype(np.int32) x_test, y_test = data.test.images, data.test.labels.astype(np.int32) NUM_STEPS = 2000 MINIBATCH_SIZE = 128 feature_columns = learn.infer_real_valued_columns_from_input(x_data) dnn = learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[200], n_classes=10, optimizer=tf.train.ProximalAdagradOptimizer(learning_rate=0.2)) dnn.fit(x=x_data, y=y_data, steps=NUM_STEPS, batch_size=MINIBATCH_SIZE) test_acc = dnn.evaluate(x=x_test, y=y_test, steps=1)["accuracy"] print(f"test accuracy {test_acc}") from sklearn.metrics import confusion_matrix y_pred = dnn.predict(x=x_test, as_iterable=False) class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] cnf_matrix = confusion_matrix(y_test, y_pred) print(cnf_matrix)
def analyze_model(test_inters, testtype, model): path_to_load = c.PATH_TO_RESULTS + "ByIntersection" + os.sep load_folder = path_to_load + testtype + os.sep save_folder = load_folder + "TestOn" + ",".join( [str(i) for i in test_inters]) + os.sep Ypred = None if "LSTM" in model: Xtrain, Ytrain = du.getFeaturesLSTM( load_folder, testtype, list({1, 2, 3, 4, 5, 6, 7, 8, 9} - set(test_inters))) #Xtest, Ytest = du.getFeaturesLSTM(load_folder, testtype, test_inters) means, stddevs = du.normalize_get_params(Xtrain) Xtrain = du.normalize(Xtrain, means, stddevs) numFeatures = Xtrain.shape[2] Xtest, Ytest = createAnalysisTestData(numFeatures, traj_len=Xtrain.shape[1]) #train the LSTM again Ypred, timeFit, timePred, all_tests_x, all_tests_y = LSTM.run_LSTM( (Xtrain, Ytrain), (Xtest, Ytest), model=model, save_path="ignore.out") else: Xtrain, Ytrain = du.getFeaturesnonLSTM( load_folder, testtype, list({1, 2, 3, 4, 5, 6, 7, 8, 9} - set(test_inters))) #Xtest, Ytest = du.getFeaturesnonLSTM(load_folder, testtype, test_inters) means, stddevs = du.normalize_get_params(Xtrain) Xtrain = du.normalize(Xtrain, means, stddevs) numFeatures = Xtrain.shape[1] Xtest, _ = createAnalysisTestData(numFeatures, traj_len=1) classifier = skflow.DNNClassifier( feature_columns=tf.contrib.learn. infer_real_valued_columns_from_input(Xtrain), hidden_units=[128, 128], n_classes=3) #, model_dir=save_folder) #try: # Ypred = classifier.predict_proba(Xtest) #except: print("Could not load saved model, re-training :(.") Ytrain = [int(i - 1) for i in Ytrain] start = time.clock() max_epochs = 10 if max_epochs: start2 = time.clock() for epoch in range(max_epochs): classifier.fit(Xtrain, Ytrain, steps=1000) end2 = time.clock() print("Epoch", epoch, "Done. Took:", end2 - start2) start2 = end2 else: classifier.fit(Xtrain, Ytrain) #, logdir=log_path) Ypred = classifier.predict_proba(Xtest) end = time.clock() timeFit = end - start print("Done fitting, time spent:", timeFit) np.savetxt(save_folder + "analysis_Ypred_" + model, np.array(Ypred)) print(model, "analysis predictions saved, test", testtype, save_folder, "analysis_Ypred_", model) return Ypred
def validateDNN(test_folder, Xtrain, Ytrain, Xtest, Ytest): scaler = preprocessing.StandardScaler(copy=False).fit(Xtrain) scaler.transform(Xtrain) scaler.transform(Xtest) classifier = skflow.DNNClassifier( feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(Xtrain), hidden_units=[128, 128], n_classes=3) Ytrain = [i-1 for i in Ytrain] Ytest = [i-1 for i in Ytest] num_batches = 10 batch_size = int(len(Ytrain) / num_batches) XValid = Xtest[0:batch_size] YValid = Ytest[0:batch_size] '''for i in range(num_batches): # was testing out, works faster, not necessarily better # classifier.partial_fit(Xtrain[i*batch_size:(i+1)*batch_size], # Ytrain[i*batch_size:(i+1)*batch_size]) # validation_score = metrics.accuracy_score(YValid, classifier.predict(XValid)) # print("Validation accuracy: %f" % validation_score) classifier.partial_fit(Xtrain, Ytrain, batch_size = batch_size) #classifier.fit(Xtrain, Ytrain) predictions = classifier.predict(Xtest) score = metrics.accuracy_score(Ytest, predictions) print("Validation accuracy: %f" % score) ''' start = time.clock() classifier.fit(Xtrain, Ytrain) end = time.clock() print(end - start) predictions = classifier.predict(Xtest) probs = classifier.predict_proba(Xtest) np.savetxt(test_folder + "DNN_validation_p_dist", probs) score = metrics.accuracy_score(Ytest, predictions) print("Accuracy: %f" % score)
def main(unused_argv): # Load dataset. iris = learn.datasets.load_dataset('iris') x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) # Build 3 layer DNN with 10, 20, 10 units respectively. feature_columns = learn.infer_real_valued_columns_from_input(x_train) classifier = learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) # Fit and predict. classifier.fit(x_train, y_train, steps=200) predictions = list(classifier.predict(x_test, as_iterable=True)) score = metrics.accuracy_score(y_test, predictions) y = list(classifier.predict_proba(x_test, as_iterable=True)) result = [] index = 0 for prob in y: temp = [] temp.append(prob[1]) temp.append(y_test[index]) index = index + 1 result.append(temp) print(result) print(score)
def dnnclassifier(): tf.logging.set_verbosity(tf.logging.INFO) traindata = pd.read_csv("./classifier.trainset.5fold" + str(n) + ".csv") y_train = traindata['txoutcome'] X_train = traindata[list(range(2, len(traindata.columns)))] testdata = pd.read_csv("./classifier.testset.5fold" + str(n) + ".csv") y_test = testdata['txoutcome'] X_test = testdata[list(range(2, len(traindata.columns)))] feature_columns = learn.infer_real_valued_columns_from_input( X_train) dnn_classifier = learn.DNNClassifier( hidden_units=[20, 40, 20], n_classes=5, feature_columns=feature_columns) dnn_classifier.fit(X_train, y_train, steps=100000) dnn_prediction = dnn_classifier.predict(X_test) print('DNN Prediction Score: {0}'.format( accuracy_score(dnn_prediction, y_test))) print(len(dnn_prediction)) print(len(y_test)) print(dnn_prediction[4]) print(y_test[4]) # save the predicted value for the next step of C-index calculation by R fout = open( "./dnn_classifier.txoutcome.5fold" + str(5 * k + n) + ".txt", "w") for j in range(len(dnn_prediction)): fout.write( str(y_test[j]) + '\t' + str(dnn_prediction[j]) + '\n')
def main(unused_argv): qhdatas, qh_target = loadDatas() x_train, x_test, y_train, y_test = cross_validation.train_test_split( qhdatas, qh_target, test_size=0.2, random_state=42) # It's useful to scale to ensure Stochastic Gradient Descent # will do the right thing. scaler = StandardScaler() # DNN classifier. classifier = learn.DNNClassifier( feature_columns=learn.infer_real_valued_columns_from_input(x_train), hidden_units=[20, 10], n_classes=3) pipeline = Pipeline([('scaler', scaler), ('DNNclassifier', classifier)]) pipeline.fit(x_train, y_train, DNNclassifier__steps=20) score = accuracy_score(y_test, list(pipeline.predict(x_test))) print('Accuracy: {0:f}'.format(score)) # the label is :[0,0,1,2,0,2,1,1,2,0] original_labels = np.array([0, 0, 1, 2, 0, 2, 1, 1, 2, 0]) new_samples = np.array( [[14, 5, 30], [24, 5, 30], [4, 15, 45], [4, 15, 60], [23, 5, 30], [14, 5, 60], [24, 5, 45], [4, 15, 45], [4, 15, 60], [23, 5, 30]], dtype=int) output = list(pipeline.predict(new_samples)) # We get the [0,0,2,2,0,2,1,2,2,0]. The new samples accuracy is 0.6 score = accuracy_score(original_labels, list(output)) print('New samples accuracy: {0:f}'.format(score))
def trainDNN(Xtrain, Ytrain, model="DNN"): tf.logging.set_verbosity(tf.logging.ERROR) modeldir = os.getcwd() + os.sep + model + os.sep check_make_paths([modeldir]) classifier = skflow.DNNClassifier( feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(Xtrain), hidden_units=[128, 128], n_classes=3, model_dir=modeldir) #print(tf.contrib.learn.infer_real_valued_columns_from_input(Xtrain)) #return #classifier.evaluate(input_fn=input_fn_eval) #classifier.predict(x=x) # returns predicted labels (i.e. label's class index). Ytrain = [int(i) for i in Ytrain] start = time.clock() #classifier.fit(input_fn=lambda: input_fn(Xtrain, Ytrain)) max_epochs = 10 start2 = time.clock() for epoch in range(max_epochs): classifier.fit(input_fn=lambda: input_fn(Xtrain, Ytrain),steps=1000) loss = testDNN(Xtrain, classifier=classifier, Y=Ytrain) end2 = time.clock() print("Epoch",epoch,"Done. Took:", end2-start2, "loss of:", loss) start2 = end2 end = time.clock() timeFit = end - start print("Done fitting, time spent:", timeFit) print("Done saving the model") testDNN(Xtrain, classifier=classifier, Y=Ytrain)
def main(unused_argv): iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor(x_val, y_val, early_stopping_rounds=200) model_dir = '/tmp/iris_model' clean_folder(model_dir) # classifier with early stopping on training data classifier1 = learn.DNNClassifier( feature_columns=learn.infer_real_valued_columns_from_input(x_train), hidden_units=[10, 20, 10], n_classes=3, model_dir=model_dir) classifier1.fit(x=x_train, y=y_train, steps=2000) predictions1 = list(classifier1.predict(x_test, as_iterable=True)) score1 = metrics.accuracy_score(y_test, predictions1) model_dir = '/tmp/iris_model_val' clean_folder(model_dir) # classifier with early stopping on validation data, save frequently for # monitor to pick up new checkpoints. classifier2 = learn.DNNClassifier( feature_columns=learn.infer_real_valued_columns_from_input(x_train), hidden_units=[10, 20, 10], n_classes=3, model_dir=model_dir, config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor]) predictions2 = list(classifier2.predict(x_test, as_iterable=True)) score2 = metrics.accuracy_score(y_test, predictions2) # In many applications, the score is improved by using early stopping print('score1: ', score1) print('score2: ', score2) print('score2 > score1: ', score2 > score1)
def getBeliefDNN(X, classifier=None): if not classifier: modeldir = os.getcwd() + os.sep + "DNN" + os.sep classifier = skflow.DNNClassifier( feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X), hidden_units=[128, 128], n_classes=3, model_dir=modeldir) probs = classifier.predict_proba(X) probs_list = [i for i in probs] return probs_list
def trainDNNTF(A, Cl, A_test, Cl_test, Root): import tensorflow as tf import tensorflow.contrib.learn as skflow from sklearn import preprocessing model_directory = Root + "/DNN-TF_" print("\n Training model saved in: ", model_directory, "\n") #********************************************** ''' Initialize Estimator and training data ''' #********************************************** print(' Initializing TensorFlow...') tf.reset_default_graph() totA = np.vstack((A, A_test)) totCl = np.append(Cl, Cl_test) numTotClasses = np.unique(totCl).size le = preprocessing.LabelEncoder() totCl2 = le.fit_transform(totCl) Cl2 = le.transform(Cl) Cl2_test = le.transform(Cl_test) feature_columns = skflow.infer_real_valued_columns_from_input( totA.astype(np.float32)) ''' tf.estimator version ''' #clf = tf.estimator.DNNClassifier(feature_columns=[totA], hidden_units=[20], # optimizer="Adagrad", n_classes=numTotClasses, # activation_fn="tanh", model_dir=model_directory) ''' tf.contrib.learn version ''' clf = skflow.DNNClassifier(feature_columns=feature_columns, hidden_units=[20], optimizer="Adagrad", n_classes=numTotClasses, activation_fn="tanh", model_dir=model_directory) #********************************************** ''' Train ''' #********************************************** ''' tf.estimator version ''' #clf.train(input_fn=lambda: input_fn(A, Cl2), steps=2000) ''' tf.contrib.learn version ''' clf.fit(input_fn=lambda: input_fn(A, Cl2), steps=100) accuracy_score = clf.evaluate(input_fn=lambda: input_fn(A_test, Cl2_test), steps=1) print('\n ================================') print(' \033[1mDNN-TF\033[0m - Accuracy') print(' ================================') print("\n Accuracy: {:.2f}%".format(100 * accuracy_score["accuracy"])) print(" Loss: {:.2f}".format(accuracy_score["loss"])) print(" Global step: {:.2f}\n".format(accuracy_score["global_step"])) print(' ================================\n') return clf, le
def predict_batch(features): print("predicting dataset") test_input = np.float32(np.reshape(f_nodes[0][0:context],(1,feature_size))) feature_columns = learn.infer_real_valued_columns_from_input(test_input) classifier = learn.DNNClassifier( feature_columns=feature_columns , hidden_units=[1024, 1024, 1024], n_classes=2 , optimizer = tf.train.AdamOptimizer(1E-4) , model_dir= MODEL_DIR) return classifier.predict(x=features, as_iterable = True)
def main(unused_in): #load dataset iris = learn.datasets.load_dataset('CERN') x_train, y_train, x_test, y_test = cross_validation.train_test_split( CERN.data, CERN.target, test_size=.2, random_state=42) #build 3 layer DNN with 10,20,10 hidden layers clasifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3) #fit and predict clasifier.fit(x_train, y_train, steps=200) score = metrics.accuracy_score(y_test, clasifier.predict(x_test)) print("accuracy:{0:f}".format(score))
def DNNgetAccuracy(X, Y_for_score): modeldir = os.getcwd() + os.sep + "DNN" + os.sep check_make_paths([modeldir]) classifier = skflow.DNNClassifier( feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X), hidden_units=[128, 128], n_classes=3, model_dir=modeldir) probs = classifier.predict_proba(X) probs_list = [i for i in probs] print(probs_list[0]) numWrong, n = countWrongLinear(probs_list, Y_for_score) print(numWrong, "wrong /", n, "== accuracy of:", 1-(float(numWrong) / n)) return (1-float(numWrong) / n)
def run5(): # IRIS_TRAINING = "iris_training.csv" # IRIS_TEST = "iris_test.csv" iris = learn.datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) feature_columns = [tf.contrib.layers.real_valued_column('', dimension=4)] clf = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3, feature_columns=feature_columns) clf.fit(X_train, y_train, steps=200) score = accuracy_score(y_test, clf.predict(X_test)) print('Accuracy: {0:f}'.format(score))
def main(unused_argv): #Load dataset iris = learn.datasets.load_dataset('iris') x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) #Build 3 layer DNN with 10, 20, 10 units respectively classifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3) #Fit and predict classifier.fit(x_train, y_train, steps=200) score = metrics.accuracy_score(y_test, classifier.predict(x_test)) print('Accuracy: {0:}'.format(score))
def __init__(self, X, Y, tune_parameters=False): super(TensorFlowNeuralNetwork, self).__init__(X, Y, tune_parameters=False) self.X = X #.todense() # TensorFlow/Skflow doesn't support sparse matrices # convert string labels into numerical labels self.Y = pd.factorize(Y)[0] output_layer = len(np.unique(Y)) if tune_parameters: self.param_dist_random = {'learning_rate': random.random(100), 'optimizer': ['Adam'], 'hidden_units': [sp_randint(50, 500), sp_randint(50, 500)]} feature_columns = [tf.contrib.layers.real_valued_column("", dimension=self.X.shape[1])] self.clf = sklearn.DNNClassifier(hidden_units=self.hidden_units, feature_columns=feature_columns, n_classes=output_layer, optimizer='Adam', model_dir="log/dnn/")
def build_estimator(model_dir, model_type): """build an estimator""" # base sparse feature process gender = layers.sparse_column_with_keys(column_name='gender', keys=['female', 'male']) education = layers.sparse_column_with_hash_bucket(column_name='education', hash_bucket_size=1000) relationship = layers.sparse_column_with_hash_bucket(column_name='relationship', hash_bucket_size=100) workclass = layers.sparse_column_with_hash_bucket(column_name='workclass', hash_bucket_size=100) occupation = layers.sparse_column_with_hash_bucket(column_name='occupation', hash_bucket_size=1000) native_country = layers.sparse_column_with_hash_bucket(column_name='native_country', hash_bucket_size=1000) # base continuous feature age = layers.real_valued_column(column_name='age') education_num = layers.real_valued_column(column_name='education_num') capital_gain = layers.real_valued_column(column_name='capital_gain') capital_loss = layers.real_valued_column(column_name='capital_loss') hours_per_week = layers.real_valued_column(column_name='hours_per_week') # transformation.bucketization 将连续变量转化为类别标签。从而提高我们的准确性 age_bucket = layers.bucketized_column(source_column=age, boundaries=[18, 25, 30, 35, 40, 45,50, 55, 60, 65]) # wide columns and deep columns # 深度模型使用到的特征和广度模型使用到的特征 # 广度模型特征只只用到了分类标签 wide_columns = [gender, native_country, education, relationship, workclass, occupation, age_bucket, layers.crossed_column(columns=[education, occupation], hash_bucket_size=int(1e4)), layers.crossed_column(columns=[age_bucket, education, occupation], hash_bucket_size=int(1e6)), layers.crossed_column(columns=[native_country, occupation], hash_bucket_size=int(1e4))] deep_columns = [layers.embedding_column(workclass, dimension=8), layers.embedding_column(education, dimension=8), layers.embedding_column(gender, dimension=8), layers.embedding_column(relationship, dimension=8), layers.embedding_column(native_country, dimension=8), layers.embedding_column(occupation, dimension=8), age, education_num, capital_gain, capital_loss, hours_per_week] if model_type == "wide": m=learn.LinearClassifier(feature_columns=wide_columns, model_dir=model_dir) elif model_type == "deep": m=learn.DNNClassifier(feature_columns=deep_columns, model_dir=model_dir, hidden_units=[100, 50]) else: m=learn.DNNLinearCombinedClassifier(model_dir=model_dir, linear_feature_columns=wide_columns, dnn_feature_columns=deep_columns, dnn_hidden_units=[256, 128, 64], dnn_activation_fn=tf.nn.relu) return m
def main(unused_argv): iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor(x_val, y_val, early_stopping_rounds=200) # classifier with early stopping on training data classifier1 = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model/') classifier1.fit(x=x_train, y=y_train, steps=2000) score1 = metrics.accuracy_score(y_test, classifier1.predict(x_test)) # classifier with early stopping on validation data, save frequently for # monitor to pick up new checkpoints. classifier2 = learn.DNNClassifier( hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model_val/', config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor]) score2 = metrics.accuracy_score(y_test, classifier2.predict(x_test)) # In many applications, the score is improved by using early stopping print('score1: ', score1) print('score2: ', score2) print('score2 > score1: ', score2 > score1)
def contrib_learn_classifier_test(): """Test tf.contrib.learn.DNN_classifier.""" language_column = layers.sparse_column_with_hash_bucket( "language", hash_bucket_size=20) feature_columns = [ layers.embedding_column(language_column, dimension=3), layers.real_valued_column("age", dtype=tf.int64) ] classifier = learn.DNNClassifier( n_classes=3, feature_columns=feature_columns, hidden_units=[100, 100], config=learn.RunConfig(tf_random_seed=1, model_dir="../model_saver/estimators/" "DNN_classifier_01"), # optimizer=optimizer_exp_decay ) classifier.fit(input_fn=_input_fn, steps=10000) print("variables_names:\n", str(classifier.get_variable_names())) # scores = classifier.evaluate(input_fn=_input_fn, # steps=100) # print("scores:\n", str(scores)) scores = classifier.evaluate( input_fn=_input_fn, steps=100, metrics={ 'my_accuracy': MetricSpec(metric_fn=metrics.streaming_accuracy, prediction_key="classes"), 'my_precision': MetricSpec(metric_fn=metrics.streaming_precision, prediction_key="classes"), 'my_recall': MetricSpec(metric_fn=metrics.streaming_recall, prediction_key="classes"), 'my_metric': MetricSpec(metric_fn=my_metric_op, prediction_key="classes") }) print("scores:\n", str(scores)) predictions = classifier.predict(input_fn=_input_fn, outputs=["classes", "probabilities"]) print("predictions") for prediction in predictions: print(prediction)
def main(unused_argv): # Load dataset. iris = learn.datasets.load_dataset('iris') x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) # Build 3 layer DNN with 10, 20, 10 units respectively. feature_columns = learn.infer_real_valued_columns_from_input(x_train) classifier = learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) # Fit and predict. classifier.fit(x_train, y_train, steps=300) predictions = list(classifier.predict(x_test, as_iterable=True)) score = metrics.accuracy_score(y_test, predictions) print('Accuracy: {0:f}'.format(score))
def main(argv): iris = learn.datasets.load_iris() x_train, x_test, y_train, y_test = model_selection.train_test_split( iris.data, iris.target, test_size=.2, random_state=42) feature_columns = [tf.contrib.layers.real_valued_column("", dimension=1)] classifier = learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) classifier.fit(x_train, y_train, steps=200) x_predit = classifier.predict_classes(x_test) x_predit = [x for x in x_predit] score = metrics.accuracy_score(y_test, x_predit) print('Accuracy: {0:f}'.format(score))
def main(): logging.getLogger().setLevel(logging.INFO) iris = learn.datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)] # Build a neural network with 3 hidden layers: 10, 20, 10 units respectively classifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3, feature_columns=feature_columns) classifier.fit(X_train, y_train, steps=2000) score = metrics.accuracy_score(y_test, classifier.predict(X_test)) print("Accuracy: {0:f}".format(score))
def dnn_model(output_dir): real, sparse = get_features() all = {} all.update(real) # create embeddings of the sparse columns embed = { colname : create_embed(col) \ for colname, col in sparse.items() } all.update(embed) estimator = tflearn.DNNClassifier(model_dir=output_dir, feature_columns=all.values(), hidden_units=[64, 16, 4]) estimator.params["head"]._thresholds = [0.7] # FIXME: hack return estimator
def testDNN(X, model="DNN", classifier=None, Y=None): modeldir = os.getcwd() + os.sep + model + os.sep if classifier == None: classifier = skflow.DNNClassifier( feature_columns = tf.contrib.learn.infer_real_valued_columns_from_input(X), hidden_units=[128, 128], n_classes=3, model_dir=modeldir) print("classifier created") if Y != None: ev = classifier.evaluate(input_fn=lambda: input_fn(X, Y), steps=1) loss_score = ev["loss"] print("Loss: {0:f}".format(loss_score)) return loss_score probs = (classifier.predict_proba(X))#input_fn=lambda: input_fn(X))) print("PROBS:", probs) for i in probs: print(i) return probs
def main(unused_argv): iris = load_iris() x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) # It's useful to scale to ensure Stochastic Gradient Descent # will do the right thing. scaler = StandardScaler() # DNN classifier classifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3) pipeline = Pipeline([('scaler', scaler), ('DNNclassifier', classifier)]) pipeline.fit(x_train, y_train, DNNclassifier__steps=200) score = accuracy_score(y_test, pipeline.predict(x_test)) print('Accuracy: {0:f}'.format(score))
def clasificacion(): data =creacionDataset() print("soy una gueva") #use scikit.learn.datasets in the future print(len(data[0]),"gonorrea",len(data[1])) image_train = np.array(data[0]) label_train = np.array(data[1]) image_train =image_train.reshape(image_train.shape[0], image_train.shape[1] * image_train.shape[2]) label_train = label_train.reshape(label_train.shape[0], ) image_train, label_train = shuffle(image_train, label_train, random_state=42) x_train, x_test, y_train, y_test = model_selection.train_test_split(image_train, label_train, test_size = .3, random_state = 42) #build 3 layer DNN with 10 20 10 units respectively feature_columns = [tf.contrib.layers.real_valued_column("", dimension=1)] classifier = learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10,20,10],n_classes=3) # #fit and predict classifier.fit(x_train, y_train, steps = 200) x_predict = classifier.predict_classes(x_test) x_predict = [x for x in x_predict ] score = metrics.accuracy_score(y_test, x_predict) print('Accuracy: {0:f}'.format(score))
def get_model(filename=CLASSIFIER_FILE): ''' Get CNN classifier object from file or create one if none exists on file.''' if(filename == None): # Load dataset print(Helper.unserialize("../Datasets/raw_new_80.data")) train_data, train_targets, test_data, expected = get_featureset('raw') raw_train_data = np.zeros((train_data.shape[0], 20, 20)) i = 0 for item in train_data: raw_train_data[i] = item.reshape((20,20)) #Display.show_image(raw_train_data[i]) i = i+1 raw_test_data = np.zeros((test_data.shape[0], 20, 20)) i = 0 for item in test_data: raw_test_data[i] = item.reshape((20,20)) #Display.show_image(raw_test_data[i]) i = i+1 # Build Classifier # classifier = skflow.TensorFlowEstimator(model_fn=multilayer_conv_model, n_classes=2, # steps=500, learning_rate=0.05, batch_size=128) classifier = skflow.DNNClassifier(feature_engineering_fn=conv_model, n_classes=2) classifier.fit(raw_train_data, train_targets) # Assess built classifier predictions = classifier.predict(raw_test_data) accuracy = metrics.accuracy_score(expected, predictions) confusion_matrix = metrics.confusion_matrix(expected, predictions) print("Confusion matrix:\n%s" % confusion_matrix) print('Accuracy: %f' % accuracy) return classifier else: serialized_classifier = Helper.unserialize(filename) return serialized_classifier