コード例 #1
0
def get_train_test(num_classes):
    (train_X2, train_y2), (test_X2, test_y2) = mnist.load_data()
    train_X, train_y = extract_training_samples('letters')
    test_X, test_y = extract_training_samples('letters')
    print(train_X.shape)
    print(train_y.shape)
    print(test_X.shape)
    print(test_y.shape)
    print(train_X2.shape)
    print(train_y2.shape)
    print(test_X2.shape)
    print(test_y2.shape)

    train_X = np.append(train_X, train_X2, axis=0)
    train_y = np.append(train_y - 1, train_y2 + 26, axis=0)
    test_X = np.append(test_X, test_X2, axis=0)
    test_y = np.append(test_y - 1, test_y2 + 26, axis=0)
    print(train_X.shape)
    print(train_y.shape)
    print(test_X.shape)
    print(test_y.shape)
    
    x_train = train_X.reshape(train_X.shape[0], 784)
    x_test = test_X.reshape(test_X.shape[0], 784)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255.0
    x_test /= 255.0
    y_train = keras.utils.to_categorical(train_y, num_classes)
    y_test = keras.utils.to_categorical(test_y, num_classes)
    print('data returned')
    return x_train, y_train, x_test, y_test
コード例 #2
0
def createDataset(length=100, colSize=8, rowSize=8, initialShuffle=True):
    mnistSize = 28
    size = rowSize * colSize
    images, labels = extract_training_samples('letters')

    if initialShuffle:
        images, labels = shuffle(images, labels)

    srcIndex = 0
    dataset = np.zeros(shape=(length, rowSize * mnistSize,
                              colSize * mnistSize))
    dataLabels = np.zeros(shape=(length, rowSize, colSize))
    for i in range(length):
        # Reshuffles if the end of the dataset is reached
        if (srcIndex + 1) * size >= len(images):
            srcIndex = 0
            images, labels = shuffle(images, labels)
        sourceImages = images[srcIndex * size:(srcIndex + 1) * size]
        sourceLables = labels[srcIndex * size:(srcIndex + 1) * size]
        srcIndex += 1

        # Creates the image and label matrix
        matrixData, matrixLabels = createLetterMatrix(sourceImages,
                                                      sourceLables,
                                                      rowSize=rowSize,
                                                      colSize=colSize,
                                                      mnistSize=mnistSize)
        dataset[i] = matrixData
        dataLabels[i] = matrixLabels

    return (dataset, dataLabels)
コード例 #3
0
def load_dataset():
    # load dataset
    (trainX, trainY) = em.extract_training_samples('letters')
    trainX, trainY = shuffle(trainX, trainY)
    (testX, testY) = em.extract_test_samples('letters')
    # reshape dataset to have a single channel
    #(trainX,trainY),(testX,testY)=load_data()
    trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
    testX = testX.reshape((testX.shape[0], 28, 28, 1))
    # one hot encode target values
    #trainY = to_categorical(trainY)
    testY = to_categorical(testY)
    tX = []
    tY = []
    # print("tX.shape",tX.shape)
    # print("tY.shape",tY.shape)
    # #print(trainy[0])
    shot = 300
    ctr = [shot] * 27
    for i in range(len(trainY)):
        label = trainY[i]
        ctr[label] = ctr[label] - 1
        if (ctr[label] > 0):
            tX.append(trainX[i])
            tY.append(trainY[i])
    print("tX.shape", len(tX))
    tY = to_categorical(tY)

    # print("tY.shape",tY.shape)

    return tX, tY, testX, testY
コード例 #4
0
ファイル: dcgan.py プロジェクト: forkbabu/Keras-GAN
    def load_emnist_data(self, n_xin, n_xout):
        """
        Load x_in, x_out and the test set
        @:param n_xin: Size of the X_in dataset
        @:param n_xout: Size of the X_out dataset
        @:return xin, xout, test
        """
        def normalize(data):
            return np.reshape((data.astype(np.float32) - 127.5) / 127.5,
                              (-1, 28, 28, 1))

        # Load and normalize the training data
        (x_train, y_train) = extract_training_samples('digits')
        x_train = normalize(x_train)

        # Shuffle for some randomness
        x_train, y_train = shuffle(x_train, y_train)

        assert (n_xin + n_xout < len(x_train)
                )  # No overflow, sizes have to be assured

        # Split into x_in and x_out
        x_in, y_in = x_train[:n_xin], y_train[:n_xin]
        x_out, y_out = x_train[n_xin:n_xin + n_xout], y_train[n_xin:n_xin +
                                                              n_xout]

        return (x_in, y_in), (x_out, y_out)
コード例 #5
0
ファイル: train.py プロジェクト: yacoubb/digit-classifier
def train(mode, dataset):
    from tensorflow import keras
    from emnist import list_datasets, extract_training_samples, extract_test_samples
    import numpy as np
    from numpy.random import seed
    from tensorflow import set_random_seed

    name = mode[0]
    mode = mode[1]
    seed(4)
    set_random_seed(4)

    (train_images, train_labels) = extract_training_samples(dataset)
    (test_images, test_labels) = extract_test_samples(dataset)
    train_labels = keras.utils.to_categorical(train_labels)
    test_labels = keras.utils.to_categorical(test_labels)

    if mode["reshape"]:
        # Reshaping the array to 4-dims so that it can work with the Keras API
        # The last number is 1, which signifies that the images are greyscale.
        train_images = np.reshape(train_images,
                                  (train_images.shape[0], 28, 28, 1))
        test_images = np.reshape(test_images,
                                 (test_images.shape[0], 28, 28, 1))

    train_images = keras.utils.normalize(train_images, axis=1)
    test_images = keras.utils.normalize(test_images, axis=1)

    model = keras.Sequential()
    for l in mode["architecture"]:
        model.add(l)

    es = keras.callbacks.EarlyStopping(monitor="val_loss",
                                       mode="min",
                                       patience=2)
    model.compile(optimizer="adam",
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    print(model.summary())
    model.fit(x=train_images,
              y=train_labels,
              epochs=100,
              validation_split=0.1,
              callbacks=[es])
    model_name = dataset + "_" + name
    model.save("./" + model_name + ".h5")
    print("saved model to " + model_name + ".h5")

    print("evaluating...")
    val_loss, val_acc = model.evaluate(x=test_images, y=test_labels)

    del train_images
    del train_labels
    del test_images
    del test_labels

    import gc

    gc.collect()
コード例 #6
0
def load_data() -> tuple:

    images_train, labels_train = extract_training_samples("letters")
    images_test, labels_test = extract_test_samples("letters")
    images = np.concatenate((images_train, images_test))
    labels = np.concatenate((labels_train, labels_test))
    images = np.expand_dims(images, axis=-1)
    labels = labels - 1
    return images, labels
コード例 #7
0
def load_emnist_balanced():
    """
    Load EMNIST Balanced
    :return: training inputs, training outputs, test inputs, test outputs, number of classes
    """
    training_images, training_labels = emnist.extract_training_samples(
        'balanced')
    test_images, test_labels = emnist.extract_test_samples('balanced')
    return training_images, training_labels, test_images, test_labels, len(
        set(training_labels))
コード例 #8
0
def load_emnist_letters():
    """
    Load EMNIST Letters
    :return: training inputs, training outputs, test inputs, test outputs, number of classes
    """
    training_images, training_labels = emnist.extract_training_samples(
        'letters')
    test_images, test_labels = emnist.extract_test_samples('letters')
    return training_images, training_labels, test_images, test_labels, len(
        set(training_labels))
コード例 #9
0
ファイル: dpgan.py プロジェクト: forkbabu/Keras-GAN
    def __init__(self, max_data=40000, noise_std=0.001, mia_attacks=None):
        self.img_rows = 28
        self.img_cols = 28
        self.channels = 1
        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        self.latent_dim = 100
        self.mia_attacks = mia_attacks

        def normalize(data):
            return np.reshape((data.astype(np.float32) - 127.5) / 127.5,
                              (-1, *self.img_shape))

        # Load, normalize and split the dataset
        (self.x_train, _), (_, _) = mnist.load_data()
        self.x_train = normalize(self.x_train)

        self.x_out, y_out = extract_training_samples('digits')
        self.x_out = normalize(self.x_out)

        self.x_train = self.x_train[:max_data]

        print("Loading with {} data samples!".format(len(self.x_train)))

        # Following parameter and optimizer set as recommended in paper
        self.n_critic = 5
        self.clip_value = 5.0
        NoisyAdam = add_gradient_noise(Adam)
        discriminator_optimizer = NoisyAdam(lr=0.0002,
                                            beta_1=0.5,
                                            clipnorm=self.clip_value,
                                            standard_deviation=noise_std)
        optimizer = RMSprop(lr=0.00005)

        # Build and compile the critic
        self.critic, self.advreg_model = self.build_critic(
            discriminator_optimizer, optimizer)

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise as input and generated imgs
        z = Input(shape=(self.latent_dim, ))
        img = self.generator(z)

        # For the combined model we will only train the generator
        self.critic.trainable = False

        # The critic takes generated images as input and determines validity
        valid = self.critic(img)

        # The combined model  (stacked generator and critic)
        self.combined = Model(z, valid)
        self.combined.compile(loss='binary_crossentropy',
                              optimizer=optimizer,
                              metrics=['accuracy'])
コード例 #10
0
def get_emnist_image(pred):
    from emnist import extract_training_samples
    import pandas as pd
    x, y = extract_training_samples('balanced')
    #find inverse of: chr(33+np.argmax(p[i]))
    csv = pd.io.parsers.read_csv('emnist-balanced-mapping.csv')
    print(pred)
    for i in range(len(x)):
        if chr(csv['Out'][y[i]]) == pred:
            return x[i]
    return x[0]
コード例 #11
0
def save_emnist_uppercase_reduced_letters64_dataset():
    x_train, y_train = emnist.extract_training_samples('byclass')
    x_test, y_test = emnist.extract_test_samples('byclass')

    train_mask = emnsit_uppercase_label_filter(y_train)
    test_mask = emnsit_uppercase_label_filter(y_test)

    x_train_reduced = x_train[train_mask]
    x_train_reduced = [
        cv2.resize(i, (64, 64), interpolation=cv2.INTER_NEAREST)
        for i in x_train_reduced
    ]
    y_train_reduced = y_train[train_mask]
    # shift to 0 label
    y_train_reduced -= 10
    y_train_reduced = replace_x_letter_label(y_train_reduced)

    x_test_reduced = x_test[test_mask]
    x_test_reduced = [
        cv2.resize(i, (64, 64), interpolation=cv2.INTER_NEAREST)
        for i in x_test_reduced
    ]
    y_test_reduced = y_test[test_mask]
    y_test_reduced -= 10
    y_test_reduced = replace_x_letter_label(y_test_reduced)

    x_train_reduced, x_val_reduced, y_train_reduced, y_val_reduced = train_test_split(
        x_train_reduced, y_train_reduced, test_size=0.1)
    x_train_reduced = np.divide(x_train_reduced, 255).astype("float64")
    x_val_reduced = np.divide(x_val_reduced, 255).astype("float64")
    x_test_reduced = np.divide(x_test_reduced, 255).astype("float64")
    #
    x_train_reduced = x_train_reduced.reshape(x_train_reduced.shape[0],
                                              x_train_reduced.shape[1],
                                              x_train_reduced.shape[2], 1)

    x_val_reduced = x_val_reduced.reshape(x_val_reduced.shape[0],
                                          x_val_reduced.shape[1],
                                          x_val_reduced.shape[2], 1)
    x_test_reduced = x_test_reduced.reshape(x_test_reduced.shape[0],
                                            x_test_reduced.shape[1],
                                            x_test_reduced.shape[2], 1)

    letters_dataset = {
        "x_train": x_train_reduced,
        "y_train": y_train_reduced,
        "x_val": x_val_reduced,
        "y_val": y_val_reduced,
        "x_test": x_test_reduced,
        "y_test": y_test_reduced
    }

    with open("eng_uppercase_letters64_dataset.bin", "wb") as file:
        pickle.dump(letters_dataset, file)
コード例 #12
0
def load_dataset():
    # load dataset
    trainX, trainY = extract_training_samples('letters')
    testX, testY = extract_test_samples('letters')
    # reshape dataset to have a single channel
    trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
    testX = testX.reshape((testX.shape[0], 28, 28, 1))
    # one hot encode target values
    trainY = to_categorical(trainY)
    testY = to_categorical(testY)
    return trainX, trainY, testX, testY
コード例 #13
0
def createTrainingSet(test_size=0.25):
    data, labels = extract_training_samples('letters')
    data = data.astype('float32') / 255.0
    labels = to_categorical(labels - 1)  # Labels are stored in 1 .. 26

    # Splits the set
    xtrain, xtest, ytrain, ytest = train_test_split(data,
                                                    labels,
                                                    test_size=test_size)
    xtrain = np.expand_dims(xtrain, -1)
    xtest = np.expand_dims(xtest, -1)
    return xtrain, xtest, ytrain, ytest
コード例 #14
0
def load_EMNIST(size=None):
    import numpy as np
    from emnist import extract_training_samples
    images, labels = extract_training_samples('letters')
    x_train = (images[:100000] / 255).astype(np.float32)
    x_valid = (images[100000:] / 255).astype(np.float32)
    if size:
        x_train = np.array([cv2.resize(x, (size, size)) for x in x_train])
        x_valid = np.array([cv2.resize(x, (size, size)) for x in x_valid])
    print(
        f"Loaded EMNIST dataset: x_train{x_train.shape}, x_valid{x_valid.shape}"
    )
    return x_train, x_valid
コード例 #15
0
ファイル: alpha_model.py プロジェクト: jhanreg11/char-recog
def preprocess():
    train_images, train_labels = emnist.extract_training_samples('mnist')
    train_images = train_images.reshape(
        (train_images.shape[0], 1, 28, 28)).astype(np.float32)
    train_images /= 255
    train_labels = one_hot(train_labels.reshape(train_labels.shape[0], 1), 10)

    test_images, test_labels = emnist.extract_test_samples('mnist')
    test_images = test_images.reshape(
        (test_images.shape[0], 1, 28, 28)).astype(np.float32)
    test_images /= 255

    return (train_images, train_labels), (test_images, test_labels)
コード例 #16
0
def saveDataSet(dataSetType):

    if dataSetType == 'digits':
        # Extract Dataset
        print('Extraction Dataset')
        X_train, y_train = extract_training_samples('digits')
        X_test, y_test = extract_test_samples('digits')

        # Reshape Dataset
        print('Reshaping Dataset ')
        images_train, labels_train = manageDataSet(len(y_train), X_train,
                                                   y_train)
        images_test, labels_test = manageDataSet(len(y_test), X_test, y_test)

        # Save the Dataset
        print('Saving Dataset')
        save("images_numbers_train.npy", images_train)
        save("labels_numbers_train.npy", labels_train)
        save("images_numbers_test.npy", images_test)
        save("labels_numbers_test.npy", labels_test)

    if dataSetType == 'letters':
        # Extract Dataset
        print('Extraction Dataset')
        X_train, y_train = extract_training_samples('letters')
        X_test, y_test = extract_test_samples('letters')

        # Reshape Dataset
        print('Reshaping Dataset ')
        imgs_train, labels_train = manageDataSet(len(y_train), X_train,
                                                 y_train)
        imgs_test, labels_test = manageDataSet(len(y_test), X_test, y_test)

        # Save reshape Dataset
        print('Extraction Dataset')
        save("images_letters_train.npy", imgs_train)
        save("labels_letters_train.npy", labels_train)
        save("images_letters_test.npy", imgs_test)
        save("labels_letters_test.npy", labels_test)
    def loadEmnist(self):
        """
        Load Emnist dataset and do some data pre-processing
        Split the training set 80/20% for training and validation set
        Convert y labels to 1-k hot array
        """

        x_train, y_train = extract_training_samples('balanced')
        x_test, y_test   = extract_test_samples('balanced')

        # Get only the upper case letters
        train_alphabet_list = (np.array(y_train) < 36) & (np.array(y_train) > 9)
        test_alphabet_list  = (np.array(y_test) < 36) & (np.array(y_test) > 9)

        y_train = y_train[train_alphabet_list] - 10
        x_train = x_train[train_alphabet_list]
        y_test = y_test[test_alphabet_list] - 10
        x_test = x_test[test_alphabet_list]

        self.nclass = 26
        self.width  = x_train.shape[1]
        self.height = x_train.shape[2]
        self.total_train_size = len(x_train)
        self.ntrain = int(0.9 * self.total_train_size)
        self.nval = int(0.1 * self.total_train_size)
        self.ntest  = len(x_test)
        self.train_counter = 0
        self.train_index = np.arange(self.ntrain)

        x_train = x_train.reshape(x_train.shape[0], self.width, self.height, 1)
        x_test = x_test.reshape(x_test.shape[0], self.width, self.height, 1)
        input_shape = (self.width, self.height, 1)

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        self.x_test = x_test/255

        self.x_val = x_train[self.ntrain:self.total_train_size]
        self.x_train = x_train[0:self.ntrain]
        y_val = y_train[self.ntrain:self.total_train_size]
        y_train = y_train[0:self.ntrain]

        # convert class vectors to binary class matrices
        self.y_train = keras.utils.to_categorical(y_train, 26)
        self.y_val = keras.utils.to_categorical(y_val, 26)
        self.y_test = keras.utils.to_categorical(y_test, 26)

        print(self.x_train.shape)
        print(self.x_val.shape)
        print(self.x_test.shape)
コード例 #18
0
    def __init__(self,
                 number_of_authors,
                 number_of_pixels=4,
                 poisoned_ratio=0.2,
                 backdoor_value=1,
                 initial_shuffle=True,
                 seed=None):

        X_train, y_train = emnist.extract_training_samples('digits')
        X_test, y_test = emnist.extract_test_samples('digits')
        X = np.concatenate((X_train, X_test))
        y = np.concatenate((y_train, y_test))

        # IMPORTANT:
        # create imbalanced datasets, i.e., the number of elements in each digit class of the same author may vary.
        # But the number of samples per author is balanced, i.e., each author has the same number of samples.

        samples_per_author = len(X) // number_of_authors

        author = np.repeat(np.arange(number_of_authors), samples_per_author)

        # throw leftover datasamples away such that we have same number of samples for each author
        skip_at_end = len(X) - len(author)
        assert skip_at_end < samples_per_author, "Why do you throw so many samples away?"
        if skip_at_end > 0:
            print(
                f"Warning: throwing {skip_at_end} samples away to have balanced number of samples per author"
            )

        X = X[:len(author)]
        y = y[:len(author)]

        # flatten X[:,-]
        print(X.shape)
        X = X.reshape((len(X), 784))
        print(X.shape)
        # binarize data
        # X[X<128] = 0
        # X[X>127] = 255
        X = X / 255

        super(PoisonedDataset_EMNIST_DIGITS,
              self).__init__(X,
                             y,
                             author,
                             number_of_classes=10,
                             number_of_pixels=number_of_pixels,
                             poisoned_ratio=poisoned_ratio,
                             backdoor_value=backdoor_value,
                             initial_shuffle=initial_shuffle,
                             seed=seed)
コード例 #19
0
def load_data():
    X_train, train_labels = extract_training_samples('byclass')
    X_test, test_labels = extract_test_samples('byclass')

    X_train, train_labels = remove_upper(X_train, train_labels)
    X_test, test_labels = remove_upper(X_test, test_labels)

    chars = '0123456789' + string.ascii_lowercase
    num_chars = len(chars)

    X_train = X_train.reshape(-1, 28, 28, 1)
    X_test = X_test.reshape(-1, 28, 28, 1)

    return X_train, X_test, train_labels, test_labels
コード例 #20
0
ファイル: create_data.py プロジェクト: rzhao97/captcha
def load_data():
    # Get numbers and letters data from EMNIST
    X_train, train_labels = extract_training_samples('byclass')
    X_test, test_labels = extract_test_samples('byclass')

    # Remove capital letters
    X_train, train_labels = remove_upper(X_train, train_labels)
    X_test, test_labels = remove_upper(X_test, test_labels)

    # Merge train and test datasets
    X = np.vstack((X_train, X_test))
    labels = np.hstack((train_labels, test_labels))

    return X, labels
コード例 #21
0
ファイル: new_gan.py プロジェクト: ryant74/handwriting-gen
    def train(self, epochs, batch_size=128, sample_interval=50):

        # Load the dataset
        X_train, Y_train = extract_training_samples('balanced')

        # Rescale -1 to 1
        X_train = X_train / 127.5 - 1.
        X_train = np.expand_dims(X_train, axis=3)

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random batch of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Generate a batch of new images
            gen_imgs = self.generator.predict(noise)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Train the generator (to have the discriminator label samples as valid)
            g_loss = self.combined.train_on_batch(noise, valid)

            # Plot the progress
            print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
                  (epoch, d_loss[0], 100 * d_loss[1], g_loss))

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                self.sample_images(epoch)
コード例 #22
0
def load_emnist_images():
    """ Loads 0-1-normalized MNIST images ranging from 0-1
    """
    (mnist_x, mnist_y), (mnist_x_test, mnist_y_test) = mnist.load_data()
    emnist_x, emnist_y = extract_training_samples('digits')
    mnist_x, mnist_x_test, emnist_x = np.reshape(mnist_x, (-1, 28, 28, 1)), np.reshape(mnist_x_test, (-1, 28, 28, 1)), \
                                      np.reshape(emnist_x, (-1, 28, 28, 1))
    mnist_x = np.vstack((mnist_x, emnist_x))
    mnist_y = np.hstack((mnist_y, emnist_y))

    # Scale everything to 0-1
    mnist_x, mnist_x_test, = normalize_0_1([mnist_x, mnist_x_test])

    return (mnist_x, transform_to_one_hot(
        mnist_y, depth=10)), (mnist_x_test,
                              transform_to_one_hot(mnist_y_test, depth=10))
コード例 #23
0
def save_emnist_reduced_letters_dataset():
    x_train, y_train = emnist.extract_training_samples('letters')
    x_test, y_test = emnist.extract_test_samples('letters')
    # Переход к меткам диапазона [0..25]
    y_train = np.subtract(y_train, 1)
    y_test = np.subtract(y_test, 1)

    # train_mask = label_filter(y_train)
    train_mask = label_filter(y_train)
    test_mask = label_filter(y_test)

    x_train_reduced = x_train[train_mask]
    y_train_reduced = y_train[train_mask]
    y_train_reduced = replace_x_letter_label(y_train_reduced)

    x_test_reduced = x_test[test_mask]
    y_test_reduced = y_test[test_mask]
    y_test_reduced = replace_x_letter_label(y_test_reduced)

    x_train_reduced, x_val_reduced, y_train_reduced, y_val_reduced = train_test_split(
        x_train_reduced, y_train_reduced, test_size=0.1)

    x_train_reduced = np.divide(x_train_reduced, 255).astype("float64")
    x_val_reduced = np.divide(x_val_reduced, 255).astype("float64")
    x_test_reduced = np.divide(x_test_reduced, 255).astype("float64")
    #
    x_train_reduced = x_train_reduced.reshape(x_train_reduced.shape[0],
                                              x_train_reduced.shape[1],
                                              x_train_reduced.shape[2], 1)
    x_val_reduced = x_val_reduced.reshape(x_val_reduced.shape[0],
                                          x_val_reduced.shape[1],
                                          x_val_reduced.shape[2], 1)
    x_test_reduced = x_test_reduced.reshape(x_test_reduced.shape[0],
                                            x_test_reduced.shape[1],
                                            x_test_reduced.shape[2], 1)

    letters_dataset = {
        "x_train": x_train_reduced,
        "y_train": y_train_reduced,
        "x_val": x_val_reduced,
        "y_val": y_val_reduced,
        "x_test": x_test_reduced,
        "y_test": y_test_reduced
    }

    with open("eng_letters_dataset.bin", "wb") as file:
        pickle.dump(letters_dataset, file)
    def get_data(self, s0):
        self.x_train, self.y_train = extract_training_samples('byclass')
        self.x_test, self.y_test = extract_test_samples('byclass')
        self.y_test = oneHotEncodeY(self.y_test, 62)
        self.y_train = oneHotEncodeY(self.y_train, 62)
        self.x_train = self.x_train.astype('float32')
        self.y_train = self.y_train.astype('float32')
        self.x_test = self.x_test.astype('float32')
        self.y_test = self.y_test.astype('float32')
        #print(np.amax(self.y_train))
        #print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape)
        
        self.x_train = self.x_train /255.
        self.y_train = self.y_train
        self.x_test = self.x_test/ 255.
        self.y_test = self.y_test
        
        self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1))
        self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1))
        #self.y_test = np.reshape(self.y_test,(self.y_test.shape[0],1))
        #self.y_train = np.reshape(self.y_train,(self.y_train.shape[0],1))
        
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]

        
        #images = np.reshape(images,(images.shape[0], 28, 28, 1))
        #self.x_train, self.y_train = mnist.get_set('train')
        #self.x_test, self.y_test = mnist.get_set('test')
        
        #print("//////////////////////////////")
        #print(type(images))
        #print(images.shape[1:4])
        #print(labels.shape)
        #print(images.shape)
        '''
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]
        print(np.amax(self.y_train))
        '''
        self.nb_classes = 62
        self.x_sub = self.x_test[:s0]
        self.y_sub = np.argmax(self.y_test[:s0], axis=1)

        self.x_test = self.x_test[s0:]
        self.y_test = self.y_test[s0:]
コード例 #25
0
def get_data(experiment, occlusion=None, bars_type=None, one_hot=False):

    # Load EMNIST data, as part of TensorFlow.
    (train_images,
     train_labels), (test_images,
                     test_labels) = emnist.extract_training_samples(
                         'balanced'), emnist.extract_test_samples('balanced')

    # (train_images, train_labels), (test_images, test_labels) = emnist.extract_training_samples(
    #     'letters'), emnist.extract_test_samples('letters')
    #     # train_labels = train_labels.reshape(-1, )
    #     # test_labels = test_labels.reshape(-1, )

    all_data = np.concatenate((train_images, test_images), axis=0)
    all_labels = np.concatenate((train_labels, test_labels), axis=0)

    # all_labels = all_labels - 1  # Change to 0-base index for letters

    # Para tabla 1 y el experimento 2
    # for i, l in enumerate(all_labels):
    #     all_labels[i] = {
    #         36: 10,
    #         37: 11,
    #         38: 13,
    #         39: 14,
    #         40: 15,
    #         41: 16,
    #         42: 17,
    #         43: 23,
    #         44: 26,
    #         45: 27,
    #         46: 29
    #     }.get(l, l)

    all_data = add_noise(all_data, experiment, occlusion, bars_type)

    all_data = all_data.reshape(
        (131600, img_columns, img_rows, constants.colors))
    all_data = all_data.astype('float32') / 255

    if one_hot:
        # Changes labels to binary rows. Each label correspond to a column, and only
        # the column for the corresponding label is set to one.
        all_labels = to_categorical(all_labels)

    return (all_data, all_labels)
コード例 #26
0
    def load_data(self):
        """
        Load data from emnist package

        # Returns:
            all_data : train data, train labels, test data and test labels
        """
        self._train_data, self._train_labels = emnist.extract_training_samples(
            'digits')
        self._train_labels = np.eye(10)[self._train_labels]
        self._test_data, self._test_labels = emnist.extract_test_samples(
            'digits')
        self._test_labels = np.eye(10)[self._test_labels]

        self.shuffle()

        return self.data
def load_data(plot=True):
    # extract data from EMNIST [letters]
    images_train, labels_train = extract_training_samples('letters')
    images_test, labels_test = extract_test_samples('letters')

    if plot:
        # randomly plot 25 letters
        f, axarr = plt.subplots(5, 5)
        indices, ctr = random.sample(range(labels_train.shape[0]), 25), 0
        for i in range(5):
            for j in range(5):
                idx = indices[ctr]
                axarr[i, j].imshow(images_train[idx], cmap="gray")
                axarr[i, j].set_title(f"{letters[labels_train[idx] - 1]}")
                ctr += 1
        plt.show()

    # flatten last two dimensions to be (N, 784,)
    return images_train.reshape((images_train.shape[0], images_train.shape[1] * images_train.shape[2])), images_test.reshape((images_test.shape[0], images_test.shape[1] * images_test.shape[2])), labels_train, labels_test
コード例 #28
0
    def _load_data(self):

        print('loading data')

        X, y = extract_training_samples('letters')

        self.train_images, self.test_images = X[:60000], X[60000:70000]
        self.train_labels, self.test_labels = y[:60000], y[60000:70000]

        self.train_images = self.train_images.astype('float32')
        self.test_images = self.test_images.astype('float32')
        self.train_images /= 255
        self.test_images /= 255

        self.org_images = self.test_images
        self.train_images = self.train_images.reshape(60000, 28, 28, 1)
        self.test_images = self.test_images.reshape(10000, 28, 28, 1)

        # One hot encode
        self.train_labels = to_categorical(self.train_labels, self.num_classes)
        self.test_labels = to_categorical(self.test_labels, self.num_classes)
def load_mnist_data(type='channel_last'):
    from emnist import extract_training_samples, extract_test_samples
    from keras.utils import np_utils
    
    # input image dimensions
    nb_classes = 26	
    img_rows, img_cols = 28, 28
    
    X_train, Y_1 = extract_training_samples('letters')
    X_test, Y_2 = extract_test_samples('letters')
    
    y_train = []
    y_test = []
    
    for i in range(Y_1.shape[0]):
        y_train.append( Y_1[i] - 1 )
    
    for i in range(Y_2.shape[0]):
        y_test.append( Y_2[i] - 1 )
    
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    
    if type == 'channel_first':
        X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
        X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    else:
        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    return X_train, Y_train, X_test, Y_test
コード例 #30
0
def main():
    np.set_printoptions(suppress=True)

    # prepare training and testing datasets
    training_images, training_labels = extract_training_samples('digits')
    test_images, test_labels = extract_test_samples('digits')
    training_images = training_images[0:10000]
    training_labels = training_labels[0:10000]
    
    tr_i = [training_images[i].flatten().reshape(784).tolist() for i in range(len(training_images))]
    for i in range(len(tr_i)):
        for j in range(len(tr_i[i])):
            tr_i[i][j] /= 255.0
            
    tr_o = [[x] for x in training_labels.tolist()]
    tr_o = [[0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01] for i in range(len(training_labels))]
    for i in range(len(tr_o)):
        tr_o[i][training_labels[i]] = 0.99
       
    # initialize and train the network
    nn = NeuralNetwork(784, [16,16], 10)
    nn.train(tr_i, tr_o, 1000)

    # gauge performance
    correct = 0
    for test_image, test_label in zip(test_images[0:500], test_labels[0:500]):
        result = nn.feed_forward(test_image.flatten().reshape(784).tolist())
        print("network result:\n", result);
        max = 0
        guess = -1
        for i, res in enumerate(result):
            if res > max:
                max = res
                guess = i
        print('network thinks this is a: ', guess)
        print("real answer:", test_label)
        if guess == int(test_label):
            correct += 1
    print('network was correct on ', correct, '/', 500, 'images')