def main(unused_argv): iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor( x_val, y_val, early_stopping_rounds=200) # classifier with early stopping on training data classifier1 = learn.DNNClassifier( feature_columns=learn.infer_real_valued_columns_from_input(x_train), hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model/') classifier1.fit(x=x_train, y=y_train, steps=2000) score1 = metrics.accuracy_score(y_test, classifier1.predict(x_test)) # classifier with early stopping on validation data, save frequently for # monitor to pick up new checkpoints. classifier2 = learn.DNNClassifier( feature_columns=learn.infer_real_valued_columns_from_input(x_train), hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model_val/', config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor]) score2 = metrics.accuracy_score(y_test, classifier2.predict(x_test)) # In many applications, the score is improved by using early stopping print('score1: ', score1) print('score2: ', score2) print('score2 > score1: ', score2 > score1)
def main(unused_argv): # Load dataset boston = learn.datasets.load_dataset('boston') x, y = boston.data, boston.target # Split dataset into train / test x_train, x_test, y_train, y_test = cross_validation.train_test_split( x, y, test_size=0.2, random_state=42) # Scale data (training set) to 0 mean and unit standard deviation. scaler = preprocessing.StandardScaler() x_train = scaler.fit_transform(x_train) # Build 2 layer fully connected DNN with 10, 10 units respectively. feature_columns = learn.infer_real_valued_columns_from_input(x_train) regressor = learn.DNNRegressor( feature_columns=feature_columns, hidden_units=[10, 10]) # Fit regressor.fit(x_train, y_train, steps=5000, batch_size=1) # Predict and score y_predicted = regressor.predict(scaler.transform(x_test)) score = metrics.mean_squared_error(y_predicted, y_test) print('MSE: {0:f}'.format(score))
def main(unused_argv): # Load dataset. iris = learn.datasets.load_dataset('iris') x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) # Note that we are saving and load iris data as h5 format as a simple # demonstration here. h5f = h5py.File('/tmp/test_hdf5.h5', 'w') h5f.create_dataset('X_train', data=x_train) h5f.create_dataset('X_test', data=x_test) h5f.create_dataset('y_train', data=y_train) h5f.create_dataset('y_test', data=y_test) h5f.close() h5f = h5py.File('/tmp/test_hdf5.h5', 'r') x_train = np.array(h5f['X_train']) x_test = np.array(h5f['X_test']) y_train = np.array(h5f['y_train']) y_test = np.array(h5f['y_test']) # Build 3 layer DNN with 10, 20, 10 units respectively. feature_columns = learn.infer_real_valued_columns_from_input(x_train) classifier = learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) # Fit and predict. classifier.fit(x_train, y_train, steps=200) score = metrics.accuracy_score(y_test, classifier.predict(x_test)) print('Accuracy: {0:f}'.format(score))
def main(unused_argv): # Load dataset. iris = learn.datasets.load_dataset('iris') x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) # Build 3 layer DNN with 10, 20, 10 units respectively. feature_columns = learn.infer_real_valued_columns_from_input(x_train) classifier = learn.DNNClassifier( feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) # Fit and predict. classifier.fit(x_train, y_train, steps=200) score = metrics.accuracy_score(y_test, classifier.predict(x_test)) print('Accuracy: {0:f}'.format(score))
def dnnclassifier(): tf.logging.set_verbosity(tf.logging.INFO) traindata = pd.read_csv("./classifier.trainset.5fold" + str(n) +".csv") y_train = traindata['txoutcome'] X_train = traindata[list(range(2,len(traindata.columns)))] testdata = pd.read_csv("./classifier.testset.5fold" + str(n) +".csv") y_test = testdata['txoutcome'] X_test = testdata[list(range(2,len(traindata.columns)))] feature_columns=learn.infer_real_valued_columns_from_input(X_train) dnn_classifier = learn.DNNClassifier(hidden_units=[20, 40, 20], n_classes=5,feature_columns=feature_columns) dnn_classifier.fit(X_train, y_train, steps = 100000) dnn_prediction = dnn_classifier.predict(X_test) print('DNN Prediction Score: {0}'.format( accuracy_score(dnn_prediction, y_test))) print(len(dnn_prediction)) print(len(y_test)) print(dnn_prediction[4]) print(y_test[4]) # save the predicted value for the next step of C-index calculation by R fout = open("./dnn_classifier.txoutcome.5fold"+str(5*k+n)+".txt","w") for j in range(len(dnn_prediction)): fout.write(str(y_test[j])+'\t' + str(dnn_prediction[j])+'\n')
def main(unused_argv): iris = load_iris() x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) # It's useful to scale to ensure Stochastic Gradient Descent # will do the right thing. scaler = StandardScaler() # DNN classifier. classifier = learn.DNNClassifier( feature_columns=learn.infer_real_valued_columns_from_input(x_train), hidden_units=[10, 20, 10], n_classes=3) pipeline = Pipeline([('scaler', scaler), ('DNNclassifier', classifier)]) pipeline.fit(x_train, y_train, DNNclassifier__steps=200) score = accuracy_score(y_test, list(pipeline.predict(x_test))) print('Accuracy: {0:f}'.format(score))
def main(unused_args): ### Download and load MNIST dataset. mnist = learn.datasets.load_dataset('mnist') ### Linear classifier. feature_columns = learn.infer_real_valued_columns_from_input( mnist.train.images) classifier = learn.LinearClassifier( feature_columns=feature_columns, n_classes=10) classifier.fit(mnist.train.images, mnist.train.labels.astype(np.int32), batch_size=100, steps=1000) score = metrics.accuracy_score( mnist.test.labels, list(classifier.predict(mnist.test.images))) print('Accuracy: {0:f}'.format(score)) ### Convolutional network classifier = learn.Estimator(model_fn=conv_model) classifier.fit(mnist.train.images, mnist.train.labels, batch_size=100, steps=20000) score = metrics.accuracy_score( mnist.test.labels, list(classifier.predict(mnist.test.images))) print('Accuracy: {0:f}'.format(score))
from __future__ import division from __future__ import print_function import shutil from sklearn import cross_validation from sklearn import datasets from sklearn import metrics from tensorflow.contrib import learn iris = datasets.load_iris() x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) classifier = learn.TensorFlowLinearClassifier( feature_columns=learn.infer_real_valued_columns_from_input(x_train), n_classes=3) classifier.fit(x_train, y_train) score = metrics.accuracy_score(y_test, classifier.predict(x_test)) print('Accuracy: {0:f}'.format(score)) # Clean checkpoint folder if exists try: shutil.rmtree('/tmp/skflow_examples/iris_custom_model') except OSError: pass # Save model, parameters and learned variables. classifier.save('/tmp/skflow_examples/iris_custom_model') classifier = None
from __future__ import absolute_import from __future__ import division from __future__ import print_function from sklearn import metrics import tensorflow as tf from tensorflow.contrib import learn ### Download and load MNIST data. mnist = learn.datasets.load_dataset('mnist') ### Linear classifier. feature_columns = learn.infer_real_valued_columns_from_input(mnist.train.images) classifier = learn.LinearClassifier( feature_columns=feature_columns, n_classes=10) classifier.fit(mnist.train.images, mnist.train.labels, batch_size=100, steps=1000) score = metrics.accuracy_score( mnist.test.labels, classifier.predict(mnist.test.images)) print('Accuracy: {0:f}'.format(score)) ### Convolutional network def max_pool_2x2(tensor_in): return tf.nn.max_pool(tensor_in, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def trainMLPviaAPI( mnistTrain, mnistTest ): print("\n####################") print("trainMLPviaAPI():\n") ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### print( "type(mnistTrain): " + str(type(mnistTrain)) ) print( "mnistTrain.columns: " + str(mnistTrain.columns) ) print( "mnistTrain.iloc[1:5,:]" ) print( mnistTrain.iloc[0:5,:] ) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### myScaler = StandardScaler() ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### X_train = mnistTrain.drop(labels = ['index','label'], axis=1) X_train = X_train.astype('float32') X_train_scaled = myScaler.fit_transform(X_train) X_train_scaled = X_train_scaled.astype('float32') y_train = mnistTrain['label'].astype('int') print( "type(X_train): " + str(type(X_train)) ) #print( "X_train.iloc[0:5,:]" ) #print( X_train.iloc[0:5,:] ) print( "type(X_train_scaled): " + str(type(X_train_scaled)) ) print( "X_train_scaled.shape: " + str(X_train_scaled.shape) ) #print( "X_train_scaled[0:5,:]" ) #print( X_train_scaled[0:5,:] ) print( "type(y_train): " + str(type(y_train)) ) print( "y_train.shape: " + str(y_train.shape) ) #print( "y_train[0:5]" ) #print( y_train[0:5] ) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### X_test = mnistTest.drop(labels = ['index','label'], axis=1) X_test = X_test.astype('float32') X_test_scaled = myScaler.transform(X_test) X_test_scaled = X_test_scaled.astype('float32') y_test = mnistTest['label'].astype('int') print( "type(X_test): " + str(type(X_test)) ) #print( "X_test.iloc[0:5,:]" ) #print( X_test.iloc[0:5,:] ) print( "type(X_test_scaled): " + str(type(X_test_scaled)) ) print( "X_test_scaled.shape: " + str(X_test_scaled.shape) ) #print( "X_test_scaled[0:5,:]" ) #print( X_test_scaled[0:5,:] ) print( "type(y_test): " + str(type(y_test)) ) print( "y_test.shape: " + str(y_test.shape) ) #print( "y_test[0:5]" ) #print( y_test[0:5] ) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### print("\ntraining begins ...") featureColumns = infer_real_valued_columns_from_input(X_train_scaled) clfDNN = SKCompat(DNNClassifier( hidden_units = [300,100], n_classes = 10, feature_columns = featureColumns )) clfDNN.fit(x = X_train_scaled, y = y_train, batch_size = 50, steps = 40000) print("\ntraining complete.\n") ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### y_predicted = clfDNN.predict(X_test_scaled) y_predicted = y_predicted['classes'] print( "type(y_predicted): " + str(type(y_predicted)) ) print( "y_predicted:" ) print( y_predicted ) #print( "y_predicted.shape: " + str(y_predicted.shape) ) print( "accuracy_score(y_test,y_predicted): " + str(accuracy_score(y_test,y_predicted)) ) #print("clfDNN.evaluate(X_test_scaled,y_test)") #print( clfDNN.evaluate(X_test_scaled,y_test) ) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### print("\nexiting: trainMLPviaAPI()") print("####################") return( None )
def train_model(self): feature_columns = learn.infer_real_valued_columns_from_input(self.x) regressor = learn.DNNRegressor( feature_columns=feature_columns, hidden_units=[10, 10]) regressor.fit(self.x, self.y, steps=500, batch_size=1) self.model = regressor
# %% [markdown] # ### using contrib.learn ’s estimator for linear regression # The whole process of defining, fitting, and evaluating the model comes down to # just a few lines: # 1. The linear regression model is instantiated using learn.LinearRegressor() and # fed with knowledge about the data representation and the type of optimizer: # 2. The regressor object is trained using .fit() . We pass the covariates and the tar‐ # get variable, and set the number of steps and batch size: # 3. The MSE loss is returned by .evaluate(): # %% num_steps = 200 minibatch_size = 506 feature_columns = learn.infer_real_valued_columns_from_input(x_data) reg = learn.LinearRegressor(feature_columns=feature_columns, optimizer=tf.train.GradientDescentOptimizer( learning_rate=0.1)) reg.fit(x_data, boston.target, steps=num_steps, batch_size=minibatch_size) MSE = reg.evaluate(x_data, boston.target, steps=1) print(MSE) # %% [markdown] # ## DNN Classifier # The DNNClassifier estimator allows us to perform a similar task with a considerably reduced amount of code. Also, it lets us add hidden layers (the “deep” part of the DNN).