# X_train[ i ].dtype: float32 # X_train[ i ].min(): 0.0 # X_train[ i ].max(): 1.0 # X_train[ i ].shape = (HEIGHT*WIDTH,): (reshape=True), (1024,) # # type(Y_train): <class 'numpy.ndarray'> # Y_train.dtype: float32 # Y_train.shape: (one_hot=False), (60000,) # # type(Y_train[ i ]): <class 'numpy.float32'> # Y_train[ i ].dtype: float32 # Y_train[ i ]: (one_hot=False), 0...9 print('Reading train dataset (Train 60000.cdb)...') X_train, Y_train = read_hoda_dataset(dataset_path='./DigitDB/Train 60000.cdb', images_height=32, images_width=32, one_hot=False, reshape=True) # type(X_test): <class 'numpy.ndarray'> # X_test.dtype: float32 # X_test.shape: (reshape=False), (20000, 32, 32, 1) # # type(X_test[ i ]): <class 'numpy.ndarray'> # X_test[ i ].dtype: float32 # X_test[ i ].min(): 0.0 # X_test[ i ].max(): 1.0 # X_test[ i ].shape = (HEIGHT, WIDTH, CHANNEL): (reshape=False), (32, 32, 1) # # type(Y_test): <class 'numpy.ndarray'>
import keras import keras_metrics from keras.models import Sequential from keras.layers import Dense, Dropout from keras.optimizers import RMSprop batch_size = 128 num_classes = 10 epochs = 20 # the data, split between train and test sets print('Reading train dataset (Train 60000.cdb)...') x_train, y_train = read_hoda_dataset(dataset_path='./DigitDB/Train 60000.cdb', images_height=32, images_width=32, one_hot=False, reshape=True) print('Reading test dataset (Test 20000.cdb)...') x_test, y_test = read_hoda_dataset(dataset_path='./DigitDB/Test 20000.cdb', images_height=32, images_width=32, one_hot=True, reshape=True) x_train = x_train.reshape(60000, 1024) x_test = x_test.reshape(20000, 1024) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255
from keras.layers.core import Dense, Activation, Dropout from keras.optimizers import Adam from keras.utils import np_utils import numpy as np np.random.seed(1671) print( '################################################################################' ) print() print('Reading train 60000.cdb ...') X_train, Y_train = read_hoda_dataset(dataset_path='./DigitDB/Train 60000.cdb', images_height=28, images_width=28, one_hot=False, reshape=True) print('Reading test 20000.cdb ...') X_test, Y_test = read_hoda_dataset(dataset_path='./DigitDB/Test 20000.cdb', images_height=28, images_width=28, one_hot=False, reshape=True) print( '################################################################################' ) print() print('Begin Deep Learning Process (Simple Deep Learning')
# import the necessary packages from HodaDatasetReader import read_hoda_cdb, read_hoda_dataset from keras.models import Sequential from keras.layers.convolutional import Conv2D from keras.layers.convolutional import AveragePooling2D from keras.preprocessing.image import ImageDataGenerator from keras.layers.core import Flatten from keras.layers.core import Dense from keras import backend as K from keras.optimizers import Adam from keras.utils import np_utils import numpy as np import cv2 print("Reading DataSets...") trainData, trainLabels = read_hoda_dataset('./DigitDB/Train 60000.cdb') validationData, validationLabels = read_hoda_dataset( './DigitDB/RemainingSamples.cdb') testData, testLabels = read_hoda_dataset('./DigitDB/Test 20000.cdb') # handle matrix for when Keras is using "channels first" ordering (Theano). # see this: https://stackoverflow.com/questions/39815518/keras-maxpooling2d-layer-gives-valueerror if K.image_data_format() == "channels_first": trainData = trainData.reshape((trainData.shape[0], 1, 32, 32)) validationData = validationData.reshape( (validationData.shape[0], 1, 32, 32)) testData = testData.reshape((testData.shape[0], 1, 32, 32)) # handle matrix for when Keras is using "channels last" ordering (Tensorflow). else: trainData = trainData.reshape((trainData.shape[0], 32, 32, 1))
print( "\nAccuracy: (true positives + true negatives) / (test size) * 100 = {}%" .format(accuracy)) log.write( "\nAccuracy: (true positives + true negatives) / (test size) * 100 = {}% \n" .format(accuracy)) log = open("log.txt", "w") log.write('#######################START########################\n') print('Reading train dataset (Train 60000.cdb)...') log.write('Reading train dataset (Train 60000.cdb)...\n') train_images, train_labels = read_hoda_dataset( dataset_path='./DigitDB/Train 60000.cdb', images_height=32, images_width=32, one_hot=False, reshape=True) print('Reading test dataset (Test 20000.cdb)...') log.write('Reading test dataset (Test 20000.cdb)...\n') test_images, test_labels = read_hoda_dataset( dataset_path='./DigitDB/Test 20000.cdb', images_height=32, images_width=32, one_hot=False, reshape=True) print('Reading remaining samples dataset (RemainingSamples.cdb)...') log.write('Reading remaining samples dataset (RemainingSamples.cdb)...\n') remaining_images, remaining_labels = read_hoda_dataset(
from HodaDatasetReader import read_hoda_dataset import pickle train_images, train_labels = read_hoda_dataset('./DigitDB/Train 60000.cdb', reshape=False) test_images, test_labels = read_hoda_dataset('./DigitDB/Test 20000.cdb', reshape=False) remaining_images, remaining_labels = read_hoda_dataset( './DigitDB/RemainingSamples.cdb', reshape=False) # because of the dataset, it's better to shuffle the dataset to increase accuracy and avoid the network from memorising train_images, train_labels = shuffle(np.array(train_images), np.array(train_labels)) test_images, test_labels = shuffle(np.array(test_images), np.array(test_labels)) remaining_images, remaining_labels = shuffle(np.array(remaining_images), np.array(remaining_labels)) # In order to save dataset to pickle listNames = [ 'train_images', 'train_labels', 'test_images', 'test_labels', 'remaining_images', 'remaining_labels' ] for i in listNames: pickle_out = open("DigitDB/{}.pickle".format(i), 'wb') pickle.dump(i, pickle_out) pickle_out.close() # Load the dataset after saving it pickle_in = open("DigitDB/train_images.pickle", "rb") train_images = pickle.load(pickle_in)