def get_train_test(num_classes): (train_X2, train_y2), (test_X2, test_y2) = mnist.load_data() train_X, train_y = extract_training_samples('letters') test_X, test_y = extract_training_samples('letters') print(train_X.shape) print(train_y.shape) print(test_X.shape) print(test_y.shape) print(train_X2.shape) print(train_y2.shape) print(test_X2.shape) print(test_y2.shape) train_X = np.append(train_X, train_X2, axis=0) train_y = np.append(train_y - 1, train_y2 + 26, axis=0) test_X = np.append(test_X, test_X2, axis=0) test_y = np.append(test_y - 1, test_y2 + 26, axis=0) print(train_X.shape) print(train_y.shape) print(test_X.shape) print(test_y.shape) x_train = train_X.reshape(train_X.shape[0], 784) x_test = test_X.reshape(test_X.shape[0], 784) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255.0 x_test /= 255.0 y_train = keras.utils.to_categorical(train_y, num_classes) y_test = keras.utils.to_categorical(test_y, num_classes) print('data returned') return x_train, y_train, x_test, y_test
def createDataset(length=100, colSize=8, rowSize=8, initialShuffle=True): mnistSize = 28 size = rowSize * colSize images, labels = extract_training_samples('letters') if initialShuffle: images, labels = shuffle(images, labels) srcIndex = 0 dataset = np.zeros(shape=(length, rowSize * mnistSize, colSize * mnistSize)) dataLabels = np.zeros(shape=(length, rowSize, colSize)) for i in range(length): # Reshuffles if the end of the dataset is reached if (srcIndex + 1) * size >= len(images): srcIndex = 0 images, labels = shuffle(images, labels) sourceImages = images[srcIndex * size:(srcIndex + 1) * size] sourceLables = labels[srcIndex * size:(srcIndex + 1) * size] srcIndex += 1 # Creates the image and label matrix matrixData, matrixLabels = createLetterMatrix(sourceImages, sourceLables, rowSize=rowSize, colSize=colSize, mnistSize=mnistSize) dataset[i] = matrixData dataLabels[i] = matrixLabels return (dataset, dataLabels)
def load_dataset(): # load dataset (trainX, trainY) = em.extract_training_samples('letters') trainX, trainY = shuffle(trainX, trainY) (testX, testY) = em.extract_test_samples('letters') # reshape dataset to have a single channel #(trainX,trainY),(testX,testY)=load_data() trainX = trainX.reshape((trainX.shape[0], 28, 28, 1)) testX = testX.reshape((testX.shape[0], 28, 28, 1)) # one hot encode target values #trainY = to_categorical(trainY) testY = to_categorical(testY) tX = [] tY = [] # print("tX.shape",tX.shape) # print("tY.shape",tY.shape) # #print(trainy[0]) shot = 300 ctr = [shot] * 27 for i in range(len(trainY)): label = trainY[i] ctr[label] = ctr[label] - 1 if (ctr[label] > 0): tX.append(trainX[i]) tY.append(trainY[i]) print("tX.shape", len(tX)) tY = to_categorical(tY) # print("tY.shape",tY.shape) return tX, tY, testX, testY
def load_emnist_data(self, n_xin, n_xout): """ Load x_in, x_out and the test set @:param n_xin: Size of the X_in dataset @:param n_xout: Size of the X_out dataset @:return xin, xout, test """ def normalize(data): return np.reshape((data.astype(np.float32) - 127.5) / 127.5, (-1, 28, 28, 1)) # Load and normalize the training data (x_train, y_train) = extract_training_samples('digits') x_train = normalize(x_train) # Shuffle for some randomness x_train, y_train = shuffle(x_train, y_train) assert (n_xin + n_xout < len(x_train) ) # No overflow, sizes have to be assured # Split into x_in and x_out x_in, y_in = x_train[:n_xin], y_train[:n_xin] x_out, y_out = x_train[n_xin:n_xin + n_xout], y_train[n_xin:n_xin + n_xout] return (x_in, y_in), (x_out, y_out)
def train(mode, dataset): from tensorflow import keras from emnist import list_datasets, extract_training_samples, extract_test_samples import numpy as np from numpy.random import seed from tensorflow import set_random_seed name = mode[0] mode = mode[1] seed(4) set_random_seed(4) (train_images, train_labels) = extract_training_samples(dataset) (test_images, test_labels) = extract_test_samples(dataset) train_labels = keras.utils.to_categorical(train_labels) test_labels = keras.utils.to_categorical(test_labels) if mode["reshape"]: # Reshaping the array to 4-dims so that it can work with the Keras API # The last number is 1, which signifies that the images are greyscale. train_images = np.reshape(train_images, (train_images.shape[0], 28, 28, 1)) test_images = np.reshape(test_images, (test_images.shape[0], 28, 28, 1)) train_images = keras.utils.normalize(train_images, axis=1) test_images = keras.utils.normalize(test_images, axis=1) model = keras.Sequential() for l in mode["architecture"]: model.add(l) es = keras.callbacks.EarlyStopping(monitor="val_loss", mode="min", patience=2) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) print(model.summary()) model.fit(x=train_images, y=train_labels, epochs=100, validation_split=0.1, callbacks=[es]) model_name = dataset + "_" + name model.save("./" + model_name + ".h5") print("saved model to " + model_name + ".h5") print("evaluating...") val_loss, val_acc = model.evaluate(x=test_images, y=test_labels) del train_images del train_labels del test_images del test_labels import gc gc.collect()
def load_data() -> tuple: images_train, labels_train = extract_training_samples("letters") images_test, labels_test = extract_test_samples("letters") images = np.concatenate((images_train, images_test)) labels = np.concatenate((labels_train, labels_test)) images = np.expand_dims(images, axis=-1) labels = labels - 1 return images, labels
def load_emnist_balanced(): """ Load EMNIST Balanced :return: training inputs, training outputs, test inputs, test outputs, number of classes """ training_images, training_labels = emnist.extract_training_samples( 'balanced') test_images, test_labels = emnist.extract_test_samples('balanced') return training_images, training_labels, test_images, test_labels, len( set(training_labels))
def load_emnist_letters(): """ Load EMNIST Letters :return: training inputs, training outputs, test inputs, test outputs, number of classes """ training_images, training_labels = emnist.extract_training_samples( 'letters') test_images, test_labels = emnist.extract_test_samples('letters') return training_images, training_labels, test_images, test_labels, len( set(training_labels))
def __init__(self, max_data=40000, noise_std=0.001, mia_attacks=None): self.img_rows = 28 self.img_cols = 28 self.channels = 1 self.img_shape = (self.img_rows, self.img_cols, self.channels) self.latent_dim = 100 self.mia_attacks = mia_attacks def normalize(data): return np.reshape((data.astype(np.float32) - 127.5) / 127.5, (-1, *self.img_shape)) # Load, normalize and split the dataset (self.x_train, _), (_, _) = mnist.load_data() self.x_train = normalize(self.x_train) self.x_out, y_out = extract_training_samples('digits') self.x_out = normalize(self.x_out) self.x_train = self.x_train[:max_data] print("Loading with {} data samples!".format(len(self.x_train))) # Following parameter and optimizer set as recommended in paper self.n_critic = 5 self.clip_value = 5.0 NoisyAdam = add_gradient_noise(Adam) discriminator_optimizer = NoisyAdam(lr=0.0002, beta_1=0.5, clipnorm=self.clip_value, standard_deviation=noise_std) optimizer = RMSprop(lr=0.00005) # Build and compile the critic self.critic, self.advreg_model = self.build_critic( discriminator_optimizer, optimizer) # Build the generator self.generator = self.build_generator() # The generator takes noise as input and generated imgs z = Input(shape=(self.latent_dim, )) img = self.generator(z) # For the combined model we will only train the generator self.critic.trainable = False # The critic takes generated images as input and determines validity valid = self.critic(img) # The combined model (stacked generator and critic) self.combined = Model(z, valid) self.combined.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
def get_emnist_image(pred): from emnist import extract_training_samples import pandas as pd x, y = extract_training_samples('balanced') #find inverse of: chr(33+np.argmax(p[i])) csv = pd.io.parsers.read_csv('emnist-balanced-mapping.csv') print(pred) for i in range(len(x)): if chr(csv['Out'][y[i]]) == pred: return x[i] return x[0]
def save_emnist_uppercase_reduced_letters64_dataset(): x_train, y_train = emnist.extract_training_samples('byclass') x_test, y_test = emnist.extract_test_samples('byclass') train_mask = emnsit_uppercase_label_filter(y_train) test_mask = emnsit_uppercase_label_filter(y_test) x_train_reduced = x_train[train_mask] x_train_reduced = [ cv2.resize(i, (64, 64), interpolation=cv2.INTER_NEAREST) for i in x_train_reduced ] y_train_reduced = y_train[train_mask] # shift to 0 label y_train_reduced -= 10 y_train_reduced = replace_x_letter_label(y_train_reduced) x_test_reduced = x_test[test_mask] x_test_reduced = [ cv2.resize(i, (64, 64), interpolation=cv2.INTER_NEAREST) for i in x_test_reduced ] y_test_reduced = y_test[test_mask] y_test_reduced -= 10 y_test_reduced = replace_x_letter_label(y_test_reduced) x_train_reduced, x_val_reduced, y_train_reduced, y_val_reduced = train_test_split( x_train_reduced, y_train_reduced, test_size=0.1) x_train_reduced = np.divide(x_train_reduced, 255).astype("float64") x_val_reduced = np.divide(x_val_reduced, 255).astype("float64") x_test_reduced = np.divide(x_test_reduced, 255).astype("float64") # x_train_reduced = x_train_reduced.reshape(x_train_reduced.shape[0], x_train_reduced.shape[1], x_train_reduced.shape[2], 1) x_val_reduced = x_val_reduced.reshape(x_val_reduced.shape[0], x_val_reduced.shape[1], x_val_reduced.shape[2], 1) x_test_reduced = x_test_reduced.reshape(x_test_reduced.shape[0], x_test_reduced.shape[1], x_test_reduced.shape[2], 1) letters_dataset = { "x_train": x_train_reduced, "y_train": y_train_reduced, "x_val": x_val_reduced, "y_val": y_val_reduced, "x_test": x_test_reduced, "y_test": y_test_reduced } with open("eng_uppercase_letters64_dataset.bin", "wb") as file: pickle.dump(letters_dataset, file)
def load_dataset(): # load dataset trainX, trainY = extract_training_samples('letters') testX, testY = extract_test_samples('letters') # reshape dataset to have a single channel trainX = trainX.reshape((trainX.shape[0], 28, 28, 1)) testX = testX.reshape((testX.shape[0], 28, 28, 1)) # one hot encode target values trainY = to_categorical(trainY) testY = to_categorical(testY) return trainX, trainY, testX, testY
def createTrainingSet(test_size=0.25): data, labels = extract_training_samples('letters') data = data.astype('float32') / 255.0 labels = to_categorical(labels - 1) # Labels are stored in 1 .. 26 # Splits the set xtrain, xtest, ytrain, ytest = train_test_split(data, labels, test_size=test_size) xtrain = np.expand_dims(xtrain, -1) xtest = np.expand_dims(xtest, -1) return xtrain, xtest, ytrain, ytest
def load_EMNIST(size=None): import numpy as np from emnist import extract_training_samples images, labels = extract_training_samples('letters') x_train = (images[:100000] / 255).astype(np.float32) x_valid = (images[100000:] / 255).astype(np.float32) if size: x_train = np.array([cv2.resize(x, (size, size)) for x in x_train]) x_valid = np.array([cv2.resize(x, (size, size)) for x in x_valid]) print( f"Loaded EMNIST dataset: x_train{x_train.shape}, x_valid{x_valid.shape}" ) return x_train, x_valid
def preprocess(): train_images, train_labels = emnist.extract_training_samples('mnist') train_images = train_images.reshape( (train_images.shape[0], 1, 28, 28)).astype(np.float32) train_images /= 255 train_labels = one_hot(train_labels.reshape(train_labels.shape[0], 1), 10) test_images, test_labels = emnist.extract_test_samples('mnist') test_images = test_images.reshape( (test_images.shape[0], 1, 28, 28)).astype(np.float32) test_images /= 255 return (train_images, train_labels), (test_images, test_labels)
def saveDataSet(dataSetType): if dataSetType == 'digits': # Extract Dataset print('Extraction Dataset') X_train, y_train = extract_training_samples('digits') X_test, y_test = extract_test_samples('digits') # Reshape Dataset print('Reshaping Dataset ') images_train, labels_train = manageDataSet(len(y_train), X_train, y_train) images_test, labels_test = manageDataSet(len(y_test), X_test, y_test) # Save the Dataset print('Saving Dataset') save("images_numbers_train.npy", images_train) save("labels_numbers_train.npy", labels_train) save("images_numbers_test.npy", images_test) save("labels_numbers_test.npy", labels_test) if dataSetType == 'letters': # Extract Dataset print('Extraction Dataset') X_train, y_train = extract_training_samples('letters') X_test, y_test = extract_test_samples('letters') # Reshape Dataset print('Reshaping Dataset ') imgs_train, labels_train = manageDataSet(len(y_train), X_train, y_train) imgs_test, labels_test = manageDataSet(len(y_test), X_test, y_test) # Save reshape Dataset print('Extraction Dataset') save("images_letters_train.npy", imgs_train) save("labels_letters_train.npy", labels_train) save("images_letters_test.npy", imgs_test) save("labels_letters_test.npy", labels_test)
def loadEmnist(self): """ Load Emnist dataset and do some data pre-processing Split the training set 80/20% for training and validation set Convert y labels to 1-k hot array """ x_train, y_train = extract_training_samples('balanced') x_test, y_test = extract_test_samples('balanced') # Get only the upper case letters train_alphabet_list = (np.array(y_train) < 36) & (np.array(y_train) > 9) test_alphabet_list = (np.array(y_test) < 36) & (np.array(y_test) > 9) y_train = y_train[train_alphabet_list] - 10 x_train = x_train[train_alphabet_list] y_test = y_test[test_alphabet_list] - 10 x_test = x_test[test_alphabet_list] self.nclass = 26 self.width = x_train.shape[1] self.height = x_train.shape[2] self.total_train_size = len(x_train) self.ntrain = int(0.9 * self.total_train_size) self.nval = int(0.1 * self.total_train_size) self.ntest = len(x_test) self.train_counter = 0 self.train_index = np.arange(self.ntrain) x_train = x_train.reshape(x_train.shape[0], self.width, self.height, 1) x_test = x_test.reshape(x_test.shape[0], self.width, self.height, 1) input_shape = (self.width, self.height, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 self.x_test = x_test/255 self.x_val = x_train[self.ntrain:self.total_train_size] self.x_train = x_train[0:self.ntrain] y_val = y_train[self.ntrain:self.total_train_size] y_train = y_train[0:self.ntrain] # convert class vectors to binary class matrices self.y_train = keras.utils.to_categorical(y_train, 26) self.y_val = keras.utils.to_categorical(y_val, 26) self.y_test = keras.utils.to_categorical(y_test, 26) print(self.x_train.shape) print(self.x_val.shape) print(self.x_test.shape)
def __init__(self, number_of_authors, number_of_pixels=4, poisoned_ratio=0.2, backdoor_value=1, initial_shuffle=True, seed=None): X_train, y_train = emnist.extract_training_samples('digits') X_test, y_test = emnist.extract_test_samples('digits') X = np.concatenate((X_train, X_test)) y = np.concatenate((y_train, y_test)) # IMPORTANT: # create imbalanced datasets, i.e., the number of elements in each digit class of the same author may vary. # But the number of samples per author is balanced, i.e., each author has the same number of samples. samples_per_author = len(X) // number_of_authors author = np.repeat(np.arange(number_of_authors), samples_per_author) # throw leftover datasamples away such that we have same number of samples for each author skip_at_end = len(X) - len(author) assert skip_at_end < samples_per_author, "Why do you throw so many samples away?" if skip_at_end > 0: print( f"Warning: throwing {skip_at_end} samples away to have balanced number of samples per author" ) X = X[:len(author)] y = y[:len(author)] # flatten X[:,-] print(X.shape) X = X.reshape((len(X), 784)) print(X.shape) # binarize data # X[X<128] = 0 # X[X>127] = 255 X = X / 255 super(PoisonedDataset_EMNIST_DIGITS, self).__init__(X, y, author, number_of_classes=10, number_of_pixels=number_of_pixels, poisoned_ratio=poisoned_ratio, backdoor_value=backdoor_value, initial_shuffle=initial_shuffle, seed=seed)
def load_data(): X_train, train_labels = extract_training_samples('byclass') X_test, test_labels = extract_test_samples('byclass') X_train, train_labels = remove_upper(X_train, train_labels) X_test, test_labels = remove_upper(X_test, test_labels) chars = '0123456789' + string.ascii_lowercase num_chars = len(chars) X_train = X_train.reshape(-1, 28, 28, 1) X_test = X_test.reshape(-1, 28, 28, 1) return X_train, X_test, train_labels, test_labels
def load_data(): # Get numbers and letters data from EMNIST X_train, train_labels = extract_training_samples('byclass') X_test, test_labels = extract_test_samples('byclass') # Remove capital letters X_train, train_labels = remove_upper(X_train, train_labels) X_test, test_labels = remove_upper(X_test, test_labels) # Merge train and test datasets X = np.vstack((X_train, X_test)) labels = np.hstack((train_labels, test_labels)) return X, labels
def train(self, epochs, batch_size=128, sample_interval=50): # Load the dataset X_train, Y_train = extract_training_samples('balanced') # Rescale -1 to 1 X_train = X_train / 127.5 - 1. X_train = np.expand_dims(X_train, axis=3) # Adversarial ground truths valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Select a random batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) imgs = X_train[idx] noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # Generate a batch of new images gen_imgs = self.generator.predict(noise) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs, valid) d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) # Train the generator (to have the discriminator label samples as valid) g_loss = self.combined.train_on_batch(noise, valid) # Plot the progress print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss)) # If at save interval => save generated image samples if epoch % sample_interval == 0: self.sample_images(epoch)
def load_emnist_images(): """ Loads 0-1-normalized MNIST images ranging from 0-1 """ (mnist_x, mnist_y), (mnist_x_test, mnist_y_test) = mnist.load_data() emnist_x, emnist_y = extract_training_samples('digits') mnist_x, mnist_x_test, emnist_x = np.reshape(mnist_x, (-1, 28, 28, 1)), np.reshape(mnist_x_test, (-1, 28, 28, 1)), \ np.reshape(emnist_x, (-1, 28, 28, 1)) mnist_x = np.vstack((mnist_x, emnist_x)) mnist_y = np.hstack((mnist_y, emnist_y)) # Scale everything to 0-1 mnist_x, mnist_x_test, = normalize_0_1([mnist_x, mnist_x_test]) return (mnist_x, transform_to_one_hot( mnist_y, depth=10)), (mnist_x_test, transform_to_one_hot(mnist_y_test, depth=10))
def save_emnist_reduced_letters_dataset(): x_train, y_train = emnist.extract_training_samples('letters') x_test, y_test = emnist.extract_test_samples('letters') # Переход к меткам диапазона [0..25] y_train = np.subtract(y_train, 1) y_test = np.subtract(y_test, 1) # train_mask = label_filter(y_train) train_mask = label_filter(y_train) test_mask = label_filter(y_test) x_train_reduced = x_train[train_mask] y_train_reduced = y_train[train_mask] y_train_reduced = replace_x_letter_label(y_train_reduced) x_test_reduced = x_test[test_mask] y_test_reduced = y_test[test_mask] y_test_reduced = replace_x_letter_label(y_test_reduced) x_train_reduced, x_val_reduced, y_train_reduced, y_val_reduced = train_test_split( x_train_reduced, y_train_reduced, test_size=0.1) x_train_reduced = np.divide(x_train_reduced, 255).astype("float64") x_val_reduced = np.divide(x_val_reduced, 255).astype("float64") x_test_reduced = np.divide(x_test_reduced, 255).astype("float64") # x_train_reduced = x_train_reduced.reshape(x_train_reduced.shape[0], x_train_reduced.shape[1], x_train_reduced.shape[2], 1) x_val_reduced = x_val_reduced.reshape(x_val_reduced.shape[0], x_val_reduced.shape[1], x_val_reduced.shape[2], 1) x_test_reduced = x_test_reduced.reshape(x_test_reduced.shape[0], x_test_reduced.shape[1], x_test_reduced.shape[2], 1) letters_dataset = { "x_train": x_train_reduced, "y_train": y_train_reduced, "x_val": x_val_reduced, "y_val": y_val_reduced, "x_test": x_test_reduced, "y_test": y_test_reduced } with open("eng_letters_dataset.bin", "wb") as file: pickle.dump(letters_dataset, file)
def get_data(self, s0): self.x_train, self.y_train = extract_training_samples('byclass') self.x_test, self.y_test = extract_test_samples('byclass') self.y_test = oneHotEncodeY(self.y_test, 62) self.y_train = oneHotEncodeY(self.y_train, 62) self.x_train = self.x_train.astype('float32') self.y_train = self.y_train.astype('float32') self.x_test = self.x_test.astype('float32') self.y_test = self.y_test.astype('float32') #print(np.amax(self.y_train)) #print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape) self.x_train = self.x_train /255. self.y_train = self.y_train self.x_test = self.x_test/ 255. self.y_test = self.y_test self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1)) self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1)) #self.y_test = np.reshape(self.y_test,(self.y_test.shape[0],1)) #self.y_train = np.reshape(self.y_train,(self.y_train.shape[0],1)) self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4] #images = np.reshape(images,(images.shape[0], 28, 28, 1)) #self.x_train, self.y_train = mnist.get_set('train') #self.x_test, self.y_test = mnist.get_set('test') #print("//////////////////////////////") #print(type(images)) #print(images.shape[1:4]) #print(labels.shape) #print(images.shape) ''' self.x_train, self.y_train = mnist.get_set('train') self.x_test, self.y_test = mnist.get_set('test') self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4] self.nb_classes = self.y_train.shape[1] print(np.amax(self.y_train)) ''' self.nb_classes = 62 self.x_sub = self.x_test[:s0] self.y_sub = np.argmax(self.y_test[:s0], axis=1) self.x_test = self.x_test[s0:] self.y_test = self.y_test[s0:]
def get_data(experiment, occlusion=None, bars_type=None, one_hot=False): # Load EMNIST data, as part of TensorFlow. (train_images, train_labels), (test_images, test_labels) = emnist.extract_training_samples( 'balanced'), emnist.extract_test_samples('balanced') # (train_images, train_labels), (test_images, test_labels) = emnist.extract_training_samples( # 'letters'), emnist.extract_test_samples('letters') # # train_labels = train_labels.reshape(-1, ) # # test_labels = test_labels.reshape(-1, ) all_data = np.concatenate((train_images, test_images), axis=0) all_labels = np.concatenate((train_labels, test_labels), axis=0) # all_labels = all_labels - 1 # Change to 0-base index for letters # Para tabla 1 y el experimento 2 # for i, l in enumerate(all_labels): # all_labels[i] = { # 36: 10, # 37: 11, # 38: 13, # 39: 14, # 40: 15, # 41: 16, # 42: 17, # 43: 23, # 44: 26, # 45: 27, # 46: 29 # }.get(l, l) all_data = add_noise(all_data, experiment, occlusion, bars_type) all_data = all_data.reshape( (131600, img_columns, img_rows, constants.colors)) all_data = all_data.astype('float32') / 255 if one_hot: # Changes labels to binary rows. Each label correspond to a column, and only # the column for the corresponding label is set to one. all_labels = to_categorical(all_labels) return (all_data, all_labels)
def load_data(self): """ Load data from emnist package # Returns: all_data : train data, train labels, test data and test labels """ self._train_data, self._train_labels = emnist.extract_training_samples( 'digits') self._train_labels = np.eye(10)[self._train_labels] self._test_data, self._test_labels = emnist.extract_test_samples( 'digits') self._test_labels = np.eye(10)[self._test_labels] self.shuffle() return self.data
def load_data(plot=True): # extract data from EMNIST [letters] images_train, labels_train = extract_training_samples('letters') images_test, labels_test = extract_test_samples('letters') if plot: # randomly plot 25 letters f, axarr = plt.subplots(5, 5) indices, ctr = random.sample(range(labels_train.shape[0]), 25), 0 for i in range(5): for j in range(5): idx = indices[ctr] axarr[i, j].imshow(images_train[idx], cmap="gray") axarr[i, j].set_title(f"{letters[labels_train[idx] - 1]}") ctr += 1 plt.show() # flatten last two dimensions to be (N, 784,) return images_train.reshape((images_train.shape[0], images_train.shape[1] * images_train.shape[2])), images_test.reshape((images_test.shape[0], images_test.shape[1] * images_test.shape[2])), labels_train, labels_test
def _load_data(self): print('loading data') X, y = extract_training_samples('letters') self.train_images, self.test_images = X[:60000], X[60000:70000] self.train_labels, self.test_labels = y[:60000], y[60000:70000] self.train_images = self.train_images.astype('float32') self.test_images = self.test_images.astype('float32') self.train_images /= 255 self.test_images /= 255 self.org_images = self.test_images self.train_images = self.train_images.reshape(60000, 28, 28, 1) self.test_images = self.test_images.reshape(10000, 28, 28, 1) # One hot encode self.train_labels = to_categorical(self.train_labels, self.num_classes) self.test_labels = to_categorical(self.test_labels, self.num_classes)
def load_mnist_data(type='channel_last'): from emnist import extract_training_samples, extract_test_samples from keras.utils import np_utils # input image dimensions nb_classes = 26 img_rows, img_cols = 28, 28 X_train, Y_1 = extract_training_samples('letters') X_test, Y_2 = extract_test_samples('letters') y_train = [] y_test = [] for i in range(Y_1.shape[0]): y_train.append( Y_1[i] - 1 ) for i in range(Y_2.shape[0]): y_test.append( Y_2[i] - 1 ) y_train = np.array(y_train) y_test = np.array(y_test) if type == 'channel_first': X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols) else: X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) X_train = X_train.astype('float32') X_test = X_test.astype('float32') print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) return X_train, Y_train, X_test, Y_test
def main(): np.set_printoptions(suppress=True) # prepare training and testing datasets training_images, training_labels = extract_training_samples('digits') test_images, test_labels = extract_test_samples('digits') training_images = training_images[0:10000] training_labels = training_labels[0:10000] tr_i = [training_images[i].flatten().reshape(784).tolist() for i in range(len(training_images))] for i in range(len(tr_i)): for j in range(len(tr_i[i])): tr_i[i][j] /= 255.0 tr_o = [[x] for x in training_labels.tolist()] tr_o = [[0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01] for i in range(len(training_labels))] for i in range(len(tr_o)): tr_o[i][training_labels[i]] = 0.99 # initialize and train the network nn = NeuralNetwork(784, [16,16], 10) nn.train(tr_i, tr_o, 1000) # gauge performance correct = 0 for test_image, test_label in zip(test_images[0:500], test_labels[0:500]): result = nn.feed_forward(test_image.flatten().reshape(784).tolist()) print("network result:\n", result); max = 0 guess = -1 for i, res in enumerate(result): if res > max: max = res guess = i print('network thinks this is a: ', guess) print("real answer:", test_label) if guess == int(test_label): correct += 1 print('network was correct on ', correct, '/', 500, 'images')