Exemple #1
0
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn import tree
from sklearn.model_selection import cross_val_score, StratifiedKFold, GridSearchCV
from sklearn import preprocessing
# import seaborn as sns
import matplotlib as plt
import pylab
import re



import parse

train_set = parse.TrainingDataset('train.csv')
test_set = parse.TestDataset('test.csv')

X_index = [i for i in range(12)]
X_index = [2,4,6,7,9]
# X_index = [2,4,5,6,7,9]
Y_index = 1

train_set.read_data(X_index,Y_index)
train_set.std_data(X_index)
batch_x, batch_y = train_set.next_batch(train_set.SIZE)

train_set.reset_counter()
test_f,test_l = train_set.next_batch(100)
test_f,test_l = train_set.next_batch(100)

def train_neural_network(x):
    prediction = neural_network_model(x)
    # OLD VERSION:
    #cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(prediction,y) )
    # NEW:
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
    optimizer = tf.train.AdamOptimizer().minimize(cost)
    # optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(cost)

    X_labels = [i for i in xrange(1, 785)]
    train_set = parse.Training_Dataset('train.csv')
    train_set.read_data(X_labels, 0, True, 10)
    hm_epochs = 1000
    with tf.Session() as sess:
        # OLD:
        #sess.run(tf.initialize_all_variables())
        # NEW:
        sess.run(tf.global_variables_initializer())

        for epoch in range(hm_epochs):
            epoch_loss = 0
            train_set.reset_counter()
            for _ in range(int(train_set.SIZE / batch_size)):
                epoch_x, epoch_y, ix = train_set.next_batch(batch_size)

                _, c = sess.run([optimizer, cost],
                                feed_dict={
                                    x: epoch_x,
                                    y: epoch_y
                                })
                epoch_loss += c

            print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:',
                  epoch_loss)
            outputt = sess.run([prediction], feed_dict={x: epoch_x})

            epoch_y_ = list(epoch_y[9])
            outputt_ = list(outputt[0][9])
            print(epoch_y_.index(max(epoch_y_)), outputt_.index(max(outputt_)))
            # print(len(epoch_y[0]))

        # print len(y_),len(epoch_y),len(epoch_y[0]),(y_[7])

        for it in xrange(len(epoch_y)):
            y_ = list(epoch_y[it])
            outputt_ = list(outputt[0][it])
            # print it
            print(y_.index(max(y_)), outputt_.index(max(outputt_)))

        x_labels = [i for i in xrange(784)]
        test_set = parse.TestDataset('test.csv')
        test_set.read_data(x_labels)
        f = open("output.csv", 'w')
        f.write("ImageId,Label\n")
        counTr = 1
        print(test_set.SizE, batch_size, test_set.SizE / batch_size)
        for _ in xrange(test_set.SizE / batch_size):
            x_test_batch, _ = test_set.next_batch(batch_size)
            y_label = sess.run([prediction], feed_dict={x: x_test_batch})

            for yi in range(len(y_label[0])):
                y_o = list(y_label[0][yi])
                # print len(y_label[0][yi]),len(y_label)
                f.write(str(counTr) + ',' + str(y_o.index(max(y_o))) + '\n')
                counTr += 1
            # print(counTr)

        # for _ in xrange(test_set.SizE% batch_size):
        x_test_batch, _ = test_set.next_batch(test_set.SizE % batch_size)
        y_label = sess.run([prediction], feed_dict={x: x_test_batch})

        for yi in range(len(y_label[0])):
            y_o = list(y_label[0][yi])
            f.write(str(counTr) + ',' + str(y_o.index(max(y_o))) + '\n')
            counTr += 1
        f.close()