from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier, export_graphviz from sklearn import tree from sklearn.model_selection import cross_val_score, StratifiedKFold, GridSearchCV from sklearn import preprocessing # import seaborn as sns import matplotlib as plt import pylab import re import parse train_set = parse.TrainingDataset('train.csv') test_set = parse.TestDataset('test.csv') X_index = [i for i in range(12)] X_index = [2,4,6,7,9] # X_index = [2,4,5,6,7,9] Y_index = 1 train_set.read_data(X_index,Y_index) train_set.std_data(X_index) batch_x, batch_y = train_set.next_batch(train_set.SIZE) train_set.reset_counter() test_f,test_l = train_set.next_batch(100) test_f,test_l = train_set.next_batch(100)
def train_neural_network(x): prediction = neural_network_model(x) # OLD VERSION: #cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y) ) # NEW: cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) optimizer = tf.train.AdamOptimizer().minimize(cost) # optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(cost) X_labels = [i for i in xrange(1, 785)] train_set = parse.Training_Dataset('train.csv') train_set.read_data(X_labels, 0, True, 10) hm_epochs = 1000 with tf.Session() as sess: # OLD: #sess.run(tf.initialize_all_variables()) # NEW: sess.run(tf.global_variables_initializer()) for epoch in range(hm_epochs): epoch_loss = 0 train_set.reset_counter() for _ in range(int(train_set.SIZE / batch_size)): epoch_x, epoch_y, ix = train_set.next_batch(batch_size) _, c = sess.run([optimizer, cost], feed_dict={ x: epoch_x, y: epoch_y }) epoch_loss += c print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss) outputt = sess.run([prediction], feed_dict={x: epoch_x}) epoch_y_ = list(epoch_y[9]) outputt_ = list(outputt[0][9]) print(epoch_y_.index(max(epoch_y_)), outputt_.index(max(outputt_))) # print(len(epoch_y[0])) # print len(y_),len(epoch_y),len(epoch_y[0]),(y_[7]) for it in xrange(len(epoch_y)): y_ = list(epoch_y[it]) outputt_ = list(outputt[0][it]) # print it print(y_.index(max(y_)), outputt_.index(max(outputt_))) x_labels = [i for i in xrange(784)] test_set = parse.TestDataset('test.csv') test_set.read_data(x_labels) f = open("output.csv", 'w') f.write("ImageId,Label\n") counTr = 1 print(test_set.SizE, batch_size, test_set.SizE / batch_size) for _ in xrange(test_set.SizE / batch_size): x_test_batch, _ = test_set.next_batch(batch_size) y_label = sess.run([prediction], feed_dict={x: x_test_batch}) for yi in range(len(y_label[0])): y_o = list(y_label[0][yi]) # print len(y_label[0][yi]),len(y_label) f.write(str(counTr) + ',' + str(y_o.index(max(y_o))) + '\n') counTr += 1 # print(counTr) # for _ in xrange(test_set.SizE% batch_size): x_test_batch, _ = test_set.next_batch(test_set.SizE % batch_size) y_label = sess.run([prediction], feed_dict={x: x_test_batch}) for yi in range(len(y_label[0])): y_o = list(y_label[0][yi]) f.write(str(counTr) + ',' + str(y_o.index(max(y_o))) + '\n') counTr += 1 f.close()