# 6. Output _O = [tf.matmul(x, _W['out']) + _b['out'] for x in _LSTM_O] _O = tf.pack(_O) # Return! return { 'X': _X, 'H': _H, 'Hsplit': _Hsplit, 'LSTM_O': _LSTM_O, 'LSTM_S': _LSTM_S, 'O': _O } # Load MNIST, our beloved friend mnist = load_data.read_data_sets("/home/a/workspace/ssd/DataSets/mnist_2/",\ "/home/a/workspace/ssd/DataSets/mnist_2_test/",one_hot=False) trainimgs, trainlabels, testimgs, testlabels = mnist.train.images,\ mnist.train.labels,\ mnist.test.images,\ mnist.test.labels ntrain, ntest, dim, nclasses \ = trainimgs.shape[0], testimgs.shape[0], trainimgs.shape[1], trainlabels.shape[1] print "ntrain: ", ntrain print "dim: ", dim print "nclasses: ", nclasses nclasses = 11 print("MNIST loaded") # Training params
outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False) outputs = tf.unstack(tf.transpose(outputs, [1,0,2])) results=[] for i in range(len(outputs)) : result = tf.matmul(outputs[i], weights['out']) + biases['out'] # shape = (128, 10) results.append(result) return results # Load MNIST, our beloved friend mnist = load_data.read_data_sets("1",\ "2",one_hot=False) trainimgs, trainlabels, testimgs, testlabels = mnist.train.images,\ mnist.train.labels,\ mnist.test.images,\ mnist.test.labels nclasses = 36 print ("MNIST loaded") # Training params training_epochs = 4000 batch_size = 1 display_step = 1 #learning_rate = 0.001 learning_rate = 0.001
import load_data import numpy as np from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score np.random.seed(100) # return DataSet class data = load_data.read_data_sets(one_hot=True) # get train data and labels by batch size train_x, train_label = data.train.next_batch(84420) # get test data test_x = data.test.data # get test labels test_labels = data.test.labels # get sample number n_samples = data.train.num_examples # use knn for classification knn = KNeighborsClassifier(n_neighbors=3) # train the model knn.fit(train_x, train_label) # predict the values y_pred = knn.predict(test_x) # print accuracy print("Accuracy", accuracy_score(y_pred, test_labels) * 100)
def parse_args1(): parser = argparse.ArgumentParser() parser.add_argument("pr") #parser.add_argument("-name","--names_print") #parser.add_argument("-inc","--increments") #print parser.pr return parser.parse_args() args1 = parse_args1() # Load MNIST, our beloved friend mnist = load_data.read_data_sets( "/mnt/d/workspace/ubuntu/workspace/ocr/" + args1.pr, "/mnt/d/workspace/ubuntu/workspace/ocr/" + args1.pr, one_hot=True, validation_size=1) trainimgs, trainlabels, testimgs, testlabels \ = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels ntrain, ntest, dim, nclasses \ = trainimgs.shape[0], testimgs.shape[0], trainimgs.shape[1], trainlabels.shape[1] print("MNIST loaded") # Recurrent neural network diminput = 28 dimhidden = 128 dimoutput = nclasses nsteps = 28 weights = { 'hidden': tf.Variable(tf.random_normal([diminput, dimhidden])),
def sparse_tuple_from(sequences, dtype=np.int32): """Create a sparse representention of x. Args: sequences: a list of lists of type dtype where each element is a sequence Returns: A tuple with (indices, values, shape) """ indices = [] values = [] for n, seq in enumerate(sequences): #print "Seq is : ",seq indices.extend(zip([n]*(seq[0].shape[0]), xrange((seq[0].shape[0]))) #print "length is : ",seq[0].shape values.extend([seq[0][0]]) values.extend([seq[0][0]]) indices = np.asarray(indices, dtype=np.int64) values = np.asarray(values, dtype=dtype) shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1]+1], dtype=np.int64) return indices, values, shape def _RNN(_X, _istate, _W, _b, _nsteps, _name): # 1. Permute input from [batchsize, nsteps, diminput] => [nsteps, batchsize, diminput] _X = tf.transpose(_X, [1, 0, 2]) # 2. Reshape input to [nsteps*batchsize, diminput] _X = tf.reshape(_X, [-1, diminput]) # 3. Input layer => Hidden layer _H = tf.matmul(_X, _W['hidden']) + _b['hidden'] # 4. Splite data to 'nsteps' chunks. An i-th chunck indicates i-th batch data _Hsplit = tf.split(0, _nsteps, _H) # 5. Get LSTM's final output (_O) and state (_S) # Both _O and _S consist of 'batchsize' elements with tf.variable_scope(_name): lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(dimhidden, forget_bias=1.0) _LSTM_O, _LSTM_S = tf.nn.rnn(lstm_cell, _Hsplit, initial_state=_istate) # 6. Output _O = [tf.matmul(x, _W['out']) + _b['out'] for x in _LSTM_O] _O = tf.pack(_O) # Return! return { 'X': _X, 'H': _H, 'Hsplit': _Hsplit, 'LSTM_O': _LSTM_O, 'LSTM_S': _LSTM_S, 'O': _O } # Load MNIST, our beloved friend #mnist = input_data.read_data_sets("../../DataSets",one_hot=False) mnist = load_data.read_data_sets("/home/a/workspace/ssd/DataSets/mnist_2/",\ "/home/a/workspace/ssd/DataSets/mnist_2/",one_hot=False) trainimgs, trainlabels, testimgs, testlabels = mnist.train.images,\ mnist.train.labels,\ mnist.test.images,\ mnist.test.labels ntrain, ntest, dim = trainimgs.shape[0],\ testimgs.shape[0],\ trainimgs.shape[1],\ #trainlabels.shape[1] nclasses = 10 print "ntrain: ",ntrain print "ntest : ",ntest print "dim: ",dim print "classes: ",trainlabels.shape print ("MNIST loaded") # Training params training_epochs = 1500 batch_size = 10 display_step = 1 learning_rate = 0.01 # Recurrent neural network params diminput = 28 dimhidden = 128 # here we add the blank label dimoutput = nclasses + 1 nsteps = 28 graph = tf.Graph() with graph.as_default(): weights = { 'hidden': tf.Variable(tf.random_normal([diminput, dimhidden])), 'out': tf.Variable(tf.random_normal([dimhidden, dimoutput])) } biases = { 'hidden': tf.Variable(tf.random_normal([dimhidden])), 'out': tf.Variable(tf.random_normal([dimoutput])) } #************************************************** # will be used in CTC_LOSS x = tf.placeholder(tf.float32, [None, nsteps, diminput]) shape_x = tf.shape(x) istate = tf.placeholder(tf.float32, [None, 2*dimhidden]) #state & cell => 2x n_hidden # [batch_size , 1] y = tf.sparse_placeholder(tf.int32) # 1d array of size [batch_size] # Seq len indicates the quantity of true data in the input, since when working with batches we have to pad with zeros to fit the input in a matrix seq_len = tf.placeholder(tf.int32, [None]) myrnn = _RNN(x, istate, weights, biases, nsteps, 'basic') pred = myrnn['O'] # [28 25 11] [nsteps, batch_size,num_nclass] shape_Pred = tf.shape(pred) #************************************************** # we add ctc module loss = ctc.ctc_loss(pred, y, seq_len) shape_y = tf.shape(y) cost = tf.reduce_mean(loss) # Adam Optimizer optm = tf.train.AdamOptimizer(learning_rate).minimize(cost) #Decode the best path decoded, log_prob = ctc.ctc_greedy_decoder(pred, seq_len) accr = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), y)) init = tf.initialize_all_variables() print ("Network Ready!") with tf.Session(graph=graph) as sess: sess.run(init) summary_writer = tf.train.SummaryWriter('./logs/', graph=sess.graph) print ("Start optimization") for epoch in range(training_epochs): avg_cost = 0. total_batch = int(mnist.train.num_examples/batch_size) # Loop over all batches for i in range(total_batch): batch_xs, batch_ys = mnist.train.next_batch(batch_size) batch_xs = batch_xs.reshape((batch_size, nsteps, diminput)) #print "shape of batch_xs is : ",batch_xs.shape # Fit training using batch data feed_dict={x: batch_xs, y: sparse_tuple_from([[value] for value in batch_ys]), \ istate: np.zeros((batch_size, 2*dimhidden)), \ seq_len: [nsteps for _ in xrange(batch_size)]} _, batch_cost = sess.run([optm, cost], feed_dict=feed_dict) # Compute average loss avg_cost += batch_cost*batch_size #print "COST_pred shape is :",pred.shape avg_cost /= len(trainimgs) # Display logs per epoch step if epoch % display_step == 0: print ("Epoch: %03d/%03d cost: %.9f" % (epoch, training_epochs, avg_cost)) train_acc = sess.run(accr, feed_dict=feed_dict) #prediction = sess.run(decoded[0],feed_dict=feed_dict) print " " print "**************************XXXXXXXXXXXXXXXXX: " print "Shape of x is : ",sess.run(shape_x,feed_dict=feed_dict) print "Shape of y is : ",sess.run(shape_y,feed_dict=feed_dict) print "Shape of pred : ",sess.run(shape_Pred,feed_dict=feed_dict) #print "prediction is : ",prediction print (" Training label error rate: %.3f" % (train_acc)) testimgs = testimgs.reshape((ntest, nsteps, diminput)) feed_dict={x: testimgs, y: sparse_tuple_from([[value] for value in testlabels]), istate: np.zeros((ntest, 2*dimhidden)), seq_len: [nsteps for _ in xrange(len(testimgs))]} test_acc = sess.run(accr, feed_dict=feed_dict) print (" Test label error rate: %.3f" % (test_acc)) p_xs, p_ys = mnist.train.next_batch(batch_size) p_xs = p_xs.reshape((batch_size,nsteps,diminput)) ''' feed_dict ={x:p_xs,y:sparse_tuple_from([[value] for value in p_ys]), istate: np.zeros((ntest, 2*dimhidden)), seq_len: [nsteps for _ in xrange(batch_size)]} prediction = sess.run(decoded[0],feed_dict=feed_dict) print "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX: ",prediction ''' print ("Optimization Finished.")
def main(): # return dataset from load_data function in load_data.py script data = load_data.read_data_sets(one_hot=True) # get train data and labels by batch size # training = 84420 and testing = 58128 BATCH_SIZE = 84420 train_x, train_labels = data.train.next_batch(BATCH_SIZE) # get test data test_x = data.test.data # get test labels test_labels = data.test.labels # to get class ratio in the testing dataset total_count = 0 pos_count = 0 neg_count = 0 neu_count = 0 #Label formation train_label = [] for i in range(len(train_labels)): total_count = total_count + 1 if train_labels[i, 0] == 1: # If emotion is Positive set label to 1 train_label.append(1) pos_count = pos_count + 1 if train_labels[i, 1] == 1: # If emotion is neutral set label to 0 train_label.append(0) neu_count = neu_count + 1 if train_labels[i, 2] == 1: # If emotion is negative set label to -1 train_label.append(-1) neg_count = neg_count + 1 test_label = [] for i in range(len(test_labels)): total_count = total_count + 1 if test_labels[i, 0] == 1: # If emotion is Positive set label to 1 test_label.append(1) pos_count = pos_count + 1 if test_labels[i, 1] == 1: # If emotion is neutral set label to 0 test_label.append(0) neu_count = neu_count + 1 if test_labels[i, 2] == 1: # If emotion is negative set label to -1 test_label.append(-1) neg_count = neg_count + 1 # Merge train and test dataset train_np = np.array(train_x) test_np = np.array(test_x) dataset = np.concatenate((train_np, test_np)) # Merge train and test labels train_label_np = np.array(train_label) test_label_np = np.array(test_label) dataset_label = np.concatenate((train_label_np, test_label_np)) # Starting timer start_time_lda = time.time() # Implementing pre processing steps such as data augmentation and feature extraction train_fe, test_fe = preprocessing_methods(dataset, dataset_label) # Implementing CNN model for classification convolution_model(train_fe, train_labels, test_fe, test_labels, start_time_lda)
#!/usr/bin/env python # coding=utf-8 # Import packages import tensorflow as tf import tensorflow.examples.tutorials.mnist.input_data as input_data import numpy as np import load_data #import matplotlib.pyplot as plt #matplotlib inline print("Packages imported") # Load MNIST, our beloved friend mnist = load_data.read_data_sets("/home/moon/work/shendu/mnist_data", "/home/moon/work/shendu/mnist_data", one_hot=False, validation_size=5000) trainimgs, trainlabels, testimgs, testlabels \ = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels ntrain, ntest, dim, nclasses \ = trainimgs.shape[0], testimgs.shape[0], trainimgs.shape[1], trainlabels.shape[1] print("MNIST loaded") # Recurrent neural network diminput = 80 dimhidden = 128 dimoutput = nclasses nsteps = 120 weights = { 'hidden': tf.Variable(tf.random_normal([diminput, dimhidden])), 'out': tf.Variable(tf.random_normal([dimhidden, dimoutput])) }
#!/usr/bin/env python # coding=utf-8 # Import packages import tensorflow as tf import tensorflow.examples.tutorials.mnist.input_data as input_data import numpy as np import load_data #import matplotlib.pyplot as plt #matplotlib inline print("Packages imported") # Load MNIST, our beloved friend mnist = load_data.read_data_sets("/home/a/workspace/ssd/DataSets/mnist", "/home/a/workspace/ssd/DataSets/mnist_test", one_hot=False, validation_size=5000) trainimgs, trainlabels, testimgs, testlabels \ = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels ntrain, ntest, dim, nclasses \ = trainimgs.shape[0], testimgs.shape[0], trainimgs.shape[1], trainlabels.shape[1] print("MNIST loaded") # Recurrent neural network diminput = 80 dimhidden = 128 dimoutput = nclasses nsteps = 120 weights = { 'hidden': tf.Variable(tf.random_normal([diminput, dimhidden])), 'out': tf.Variable(tf.random_normal([dimhidden, dimoutput])) }
def run_training(): """Train, evaluate and test the model """ #get the data and format it as DataSet tuples data_sets = ld.read_data_sets(FLAGS.train_dir, FLAGS.train_file) feature_groups = data_sets.train.feature_groups feature_size = data_sets.train.feature_size dm.FEATURE_SIZE = feature_size #set the feature size of the datasets if FLAGS.skip_eval == 'NO': #output the evaluation results if not os.path.exists(FLAGS.evaluation_dir): os.makedirs(FLAGS.evaluation_dir) eval_output = open(FLAGS.evaluation_dir + "/" + FLAGS.evaluation_file, "w") para_info = get_parameters() eval_output.write("#" + para_info + "\n") eval_output.write("train_steps\teval_data_type\tloss\tcindex\n") #output the evaluation cindex detail if not os.path.exists(FLAGS.evaluation_dir): os.makedirs(FLAGS.evaluation_dir) if FLAGS.output_evaluation_cindex == 'YES': eval_cindex_output = open( FLAGS.evaluation_dir + "/" + FLAGS.evaluation_cindex_file, "w") eval_cindex_output.write( "eval_type\tdates\tdates_predicted\tcensorship\tcindex\n") #Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): #Generate input placeholder feature_pl, at_risk_pl, date_pl, censor_pl = placeholder_inputs( FLAGS.batch_size) # Build the graph and get the prediction from the inference model isTrain_pl = tf.placeholder( tf.bool, shape=() ) #boolean value to check if it is during training optimization process if FLAGS.model == 'NN': hidden_nodes = [int(x) for x in FLAGS.hidden_nodes.split(",")] #inf_output_pl=tf.cond(isTrain_pl,inf_train,inf_nontrain) inf_output_pl = dm.inference(feature_pl, hidden_nodes, FLAGS.activation, FLAGS.dropout_keep_rate, isTrain_pl) #inf_output_pl=dm.inference(feature_pl,hidden_nodes,FLAGS.activation) elif FLAGS.model == 'linear': #alpha should equal to 1 inf_output_pl = dm.inference_linear(feature_pl) # Add to the Graph the Ops for loss calculation. #loss=dm.loss(inf_output_pl,at_risk_pl,censor_pl,feature_groups,FLAGS.batch_size,FLAGS.alpha,FLAGS.scale,FLAGS.reg_type) loss = dm.loss(inf_output_pl, censor_pl, feature_groups, FLAGS.batch_size, FLAGS.alpha, FLAGS.scale, FLAGS.delta, FLAGS.reg_type) # Add to the Graph the Ops that calculate and apply gradients. train_op = dm.training(loss, FLAGS.initial_learning_rate) # Add evaluation to the Graph #cindex_pl=dm.evaluation(inf_output,date_pl,censor_pl) # Build the summary Tensor based on the TF collection of Summaries. #summary=tf.summary.merge_all() # Add the variable initializer Op. init = tf.global_variables_initializer() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Instantiate a SummaryWriter to output summaries and the Graph. #summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) # Run the Op to initialize the variables. sess.run(init) # Begin the training loops. for step in range(FLAGS.max_steps): start_time = time.time() #fill the data feed_dict = fill_feed_dict( data_sets.train.next_batch(FLAGS.batch_size), feature_pl, at_risk_pl, date_pl, censor_pl) feed_dict[isTrain_pl] = True #run training, update parameters _, loss_value, output = sess.run([train_op, loss, inf_output_pl], feed_dict=feed_dict) #_, loss_value, loss2_value , output= sess.run([train_op,loss, loss2 ,inf_output_pl],feed_dict=feed_dict) #TEST #print("output type:"+str(type(output))) #print("output shape:"+str(output.shape)) duration = time.time() - start_time if step % 5 == 0: pass # Print status to stdout. #print('Step %d: loss = %.2f (%.3f sec) : output max = %.2f' % (step, loss_value, duration, output.max())) # Update the events file. #summary_str = sess.run(summary, feed_dict=feed_dict) #summary_writer.add_summary(summary_str, step) #summary_writer.flush() # Save a checkpoint and evaluate the model periodically. if (step + 1) % 5 == 0 or (step + 1) == FLAGS.max_steps: print('Step: %d , duration: %.3f sec' % (step + 1, duration)) #Save the variables to disk checkpoint_file = os.path.join(FLAGS.saver_file_dir, FLAGS.saver_file_prefix) saver.save(sess, checkpoint_file, global_step=step) if FLAGS.skip_eval == 'NO': if FLAGS.output_evaluation_cindex != 'YES': eval_cindex_output = 0 # Evaluate agianst the traing data. print("Training data evaluation:") do_eval(sess, eval_cindex_output, eval_output, "training_data", step + 1, loss, inf_output_pl, feature_pl, at_risk_pl, date_pl, censor_pl, data_sets.train, isTrain_pl, False) print("Validation data evaluation:") do_eval(sess, eval_cindex_output, eval_output, "validation_data", step + 1, loss, inf_output_pl, feature_pl, at_risk_pl, date_pl, censor_pl, data_sets.validation, isTrain_pl, False) print("Testing data evaluation:") do_eval(sess, eval_cindex_output, eval_output, "testing_data", step + 1, loss, inf_output_pl, feature_pl, at_risk_pl, date_pl, censor_pl, data_sets.test, isTrain_pl, False) if FLAGS.skip_eval == 'NO': eval_output.close() if FLAGS.output_evaluation_cindex == 'YES': eval_cindex_output.close()