def train(self, tgtDir): self.buildNet() init = tf.global_variables_initializer() trainSet = open(self.trainList).readlines() initlabel = open(self.labelList).readlines() label = [int(x) for x in initlabel] with tf.Session() as sess: sess.run(init) for batchIndex in range(self.batches): startTime = time.time() imageSet = utils.randomChoose(trainSet, self.batchsize) x = utils.loadImagesFromFile(imageSet, self.width, self.height) imageLabel = utils.getLabels(trainSet, imageSet, label) y = [] for i in range(self.batchsize): y.append([ 1 if j == imageLabel[i] else 0 for j in range(self.classes) ]) sess.run(self.optimizer, feed_dict={self.x: x, self.y: y}) endTime = time.time() curLoss = sess.run(self.loss, feed_dict={self.x: x, self.y: y}) print("Batch #%d processing time %.2fs, loss = %.5f" % (batchIndex + 1, endTime - startTime, curLoss)) if batchIndex % 10 == 0: correctImage = 0.0 for i in range(self.batchsize): predict = sess.run( self.prediction, feed_dict={ self.x: [x[i]], self.y: [[ 1 if j == imageLabel[i] else 0 for j in range(self.classes) ]] }) if predict == imageLabel[i]: print predict correctImage += 1 print("Valid in step %d: recall: %.5f" % (batchIndex + 1, correctImage / self.batchsize)) # save weights to file if batchIndex % 50 == 0: self.saveWeights(tgtDir) self.saveAll(tgtDir)
# This script converts downloaded datasheets into CSV files. Columns and header # are normalized, so they are ready to be imported into the database. import os import re import csv import pandas as pd import multiprocessing as mp import utils DIR_SOURCES = "src/" DIR_TARGET = "norm/" names = utils.getNormalizedNames() labels = utils.getLabels() fields = utils.getFieldNames() def composeColumn(ds, label): size = labels[label]['size'] kind = labels[label]['kind'] if not label in ds.columns: if kind == 'str': ds[label] = "" return else: ds[label] = float('nan') if kind == 'float' else int(0) return if kind == 'str': if size == '0':
from sklearn import datasets from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score from sklearn.externals import joblib import utils train_size = 60000 test_size = 10000 x_train = utils.getImages('./data/train-images-idx3-ubyte.gz', train_size) #print(len(x)) #print(x[1].shape) #image = np.reshape(x[2], (28,28)) #plt.imshow(image, cmap='gray', vmin=0, vmax=255) #plt.show() y_train = utils.getLabels("./data/train-labels-idx1-ubyte.gz", train_size) #### Build the model mlp = MLPClassifier(hidden_layer_sizes=(15, ), activation='logistic', alpha=1e-4, solver='sgd', tol=1e-4, random_state=1, learning_rate_init=.1, verbose=True) mlp.fit(x_train, y_train) x_test = utils.getImages('./data/t10k-images-idx3-ubyte.gz', test_size) y_test = utils.getLabels('./data/t10k-labels-idx1-ubyte.gz', train_size)
import predict import utils x_test = utils.getImages('./data/t10k-images-idx3-ubyte.gz', 100) y_test = utils.getLabels('./data/t10k-labels-idx1-ubyte.gz', 100) pred = predict.predictMLPClassifier(x_test[98]) print("Pred: " + str(pred) + ", Actual: " + str(y_test[98]))