예제 #1
0
    def load(self):
        """
        Downloads the dataset following the specifification of the instantiated instance
        """
        # donwload proper dataset
        if self.onlyDigits:
            logging.info('Downloading digits from EMNIST repository')
            X, y = extract_test_samples('digits')
            if self.kerasDB:
                logging.info(
                    'Downloading the Original MNIST dataset from TensorFlow Datasets'
                )
                (X_train, y_train), (X_test, y_test) = mnist.load_data()
                # concatenate train and test together
                # we want to perform the training with the highest number of training instances
                X = np.concatenate((X_train, X_test))
                y = np.concatenate((y_train, y_test))
        else:
            logging.info(
                'Downloading letters and digits from EMNIST repository')
            X, y = extract_test_samples('balanced')
        # save downloded data in the dataset folder as numpy array
        np.save(os.path.join(self.dirData, 'data.npy'), X)
        np.save(os.path.join(self.dirData, 'label.npy'), y)

        if self.verbose:
            self.printInfo(y)

        return X, y
def load_dataset():
    # load dataset
    (trainX, trainY) = em.extract_training_samples('letters')
    trainX, trainY = shuffle(trainX, trainY)
    (testX, testY) = em.extract_test_samples('letters')
    # reshape dataset to have a single channel
    #(trainX,trainY),(testX,testY)=load_data()
    trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
    testX = testX.reshape((testX.shape[0], 28, 28, 1))
    # one hot encode target values
    #trainY = to_categorical(trainY)
    testY = to_categorical(testY)
    tX = []
    tY = []
    # print("tX.shape",tX.shape)
    # print("tY.shape",tY.shape)
    # #print(trainy[0])
    shot = 300
    ctr = [shot] * 27
    for i in range(len(trainY)):
        label = trainY[i]
        ctr[label] = ctr[label] - 1
        if (ctr[label] > 0):
            tX.append(trainX[i])
            tY.append(trainY[i])
    print("tX.shape", len(tX))
    tY = to_categorical(tY)

    # print("tY.shape",tY.shape)

    return tX, tY, testX, testY
예제 #3
0
 def loadTestData(self, category='letters'):
     """
     Load testing data for EMNIST uppercase/lowercase 26 characters and
     format appropriately
     """
     testImages, testLabels = emnist.extract_test_samples(category)
     return reformat(testImages, testLabels)
예제 #4
0
def train(mode, dataset):
    from tensorflow import keras
    from emnist import list_datasets, extract_training_samples, extract_test_samples
    import numpy as np
    from numpy.random import seed
    from tensorflow import set_random_seed

    name = mode[0]
    mode = mode[1]
    seed(4)
    set_random_seed(4)

    (train_images, train_labels) = extract_training_samples(dataset)
    (test_images, test_labels) = extract_test_samples(dataset)
    train_labels = keras.utils.to_categorical(train_labels)
    test_labels = keras.utils.to_categorical(test_labels)

    if mode["reshape"]:
        # Reshaping the array to 4-dims so that it can work with the Keras API
        # The last number is 1, which signifies that the images are greyscale.
        train_images = np.reshape(train_images,
                                  (train_images.shape[0], 28, 28, 1))
        test_images = np.reshape(test_images,
                                 (test_images.shape[0], 28, 28, 1))

    train_images = keras.utils.normalize(train_images, axis=1)
    test_images = keras.utils.normalize(test_images, axis=1)

    model = keras.Sequential()
    for l in mode["architecture"]:
        model.add(l)

    es = keras.callbacks.EarlyStopping(monitor="val_loss",
                                       mode="min",
                                       patience=2)
    model.compile(optimizer="adam",
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    print(model.summary())
    model.fit(x=train_images,
              y=train_labels,
              epochs=100,
              validation_split=0.1,
              callbacks=[es])
    model_name = dataset + "_" + name
    model.save("./" + model_name + ".h5")
    print("saved model to " + model_name + ".h5")

    print("evaluating...")
    val_loss, val_acc = model.evaluate(x=test_images, y=test_labels)

    del train_images
    del train_labels
    del test_images
    del test_labels

    import gc

    gc.collect()
예제 #5
0
def load_data() -> tuple:

    images_train, labels_train = extract_training_samples("letters")
    images_test, labels_test = extract_test_samples("letters")
    images = np.concatenate((images_train, images_test))
    labels = np.concatenate((labels_train, labels_test))
    images = np.expand_dims(images, axis=-1)
    labels = labels - 1
    return images, labels
예제 #6
0
def load_emnist_balanced():
    """
    Load EMNIST Balanced
    :return: training inputs, training outputs, test inputs, test outputs, number of classes
    """
    training_images, training_labels = emnist.extract_training_samples(
        'balanced')
    test_images, test_labels = emnist.extract_test_samples('balanced')
    return training_images, training_labels, test_images, test_labels, len(
        set(training_labels))
예제 #7
0
def load_emnist_letters():
    """
    Load EMNIST Letters
    :return: training inputs, training outputs, test inputs, test outputs, number of classes
    """
    training_images, training_labels = emnist.extract_training_samples(
        'letters')
    test_images, test_labels = emnist.extract_test_samples('letters')
    return training_images, training_labels, test_images, test_labels, len(
        set(training_labels))
예제 #8
0
def save_emnist_uppercase_reduced_letters64_dataset():
    x_train, y_train = emnist.extract_training_samples('byclass')
    x_test, y_test = emnist.extract_test_samples('byclass')

    train_mask = emnsit_uppercase_label_filter(y_train)
    test_mask = emnsit_uppercase_label_filter(y_test)

    x_train_reduced = x_train[train_mask]
    x_train_reduced = [
        cv2.resize(i, (64, 64), interpolation=cv2.INTER_NEAREST)
        for i in x_train_reduced
    ]
    y_train_reduced = y_train[train_mask]
    # shift to 0 label
    y_train_reduced -= 10
    y_train_reduced = replace_x_letter_label(y_train_reduced)

    x_test_reduced = x_test[test_mask]
    x_test_reduced = [
        cv2.resize(i, (64, 64), interpolation=cv2.INTER_NEAREST)
        for i in x_test_reduced
    ]
    y_test_reduced = y_test[test_mask]
    y_test_reduced -= 10
    y_test_reduced = replace_x_letter_label(y_test_reduced)

    x_train_reduced, x_val_reduced, y_train_reduced, y_val_reduced = train_test_split(
        x_train_reduced, y_train_reduced, test_size=0.1)
    x_train_reduced = np.divide(x_train_reduced, 255).astype("float64")
    x_val_reduced = np.divide(x_val_reduced, 255).astype("float64")
    x_test_reduced = np.divide(x_test_reduced, 255).astype("float64")
    #
    x_train_reduced = x_train_reduced.reshape(x_train_reduced.shape[0],
                                              x_train_reduced.shape[1],
                                              x_train_reduced.shape[2], 1)

    x_val_reduced = x_val_reduced.reshape(x_val_reduced.shape[0],
                                          x_val_reduced.shape[1],
                                          x_val_reduced.shape[2], 1)
    x_test_reduced = x_test_reduced.reshape(x_test_reduced.shape[0],
                                            x_test_reduced.shape[1],
                                            x_test_reduced.shape[2], 1)

    letters_dataset = {
        "x_train": x_train_reduced,
        "y_train": y_train_reduced,
        "x_val": x_val_reduced,
        "y_val": y_val_reduced,
        "x_test": x_test_reduced,
        "y_test": y_test_reduced
    }

    with open("eng_uppercase_letters64_dataset.bin", "wb") as file:
        pickle.dump(letters_dataset, file)
예제 #9
0
def load_dataset():
    # load dataset
    trainX, trainY = extract_training_samples('letters')
    testX, testY = extract_test_samples('letters')
    # reshape dataset to have a single channel
    trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
    testX = testX.reshape((testX.shape[0], 28, 28, 1))
    # one hot encode target values
    trainY = to_categorical(trainY)
    testY = to_categorical(testY)
    return trainX, trainY, testX, testY
예제 #10
0
def preprocess():
    train_images, train_labels = emnist.extract_training_samples('mnist')
    train_images = train_images.reshape(
        (train_images.shape[0], 1, 28, 28)).astype(np.float32)
    train_images /= 255
    train_labels = one_hot(train_labels.reshape(train_labels.shape[0], 1), 10)

    test_images, test_labels = emnist.extract_test_samples('mnist')
    test_images = test_images.reshape(
        (test_images.shape[0], 1, 28, 28)).astype(np.float32)
    test_images /= 255

    return (train_images, train_labels), (test_images, test_labels)
def saveDataSet(dataSetType):

    if dataSetType == 'digits':
        # Extract Dataset
        print('Extraction Dataset')
        X_train, y_train = extract_training_samples('digits')
        X_test, y_test = extract_test_samples('digits')

        # Reshape Dataset
        print('Reshaping Dataset ')
        images_train, labels_train = manageDataSet(len(y_train), X_train,
                                                   y_train)
        images_test, labels_test = manageDataSet(len(y_test), X_test, y_test)

        # Save the Dataset
        print('Saving Dataset')
        save("images_numbers_train.npy", images_train)
        save("labels_numbers_train.npy", labels_train)
        save("images_numbers_test.npy", images_test)
        save("labels_numbers_test.npy", labels_test)

    if dataSetType == 'letters':
        # Extract Dataset
        print('Extraction Dataset')
        X_train, y_train = extract_training_samples('letters')
        X_test, y_test = extract_test_samples('letters')

        # Reshape Dataset
        print('Reshaping Dataset ')
        imgs_train, labels_train = manageDataSet(len(y_train), X_train,
                                                 y_train)
        imgs_test, labels_test = manageDataSet(len(y_test), X_test, y_test)

        # Save reshape Dataset
        print('Extraction Dataset')
        save("images_letters_train.npy", imgs_train)
        save("labels_letters_train.npy", labels_train)
        save("images_letters_test.npy", imgs_test)
        save("labels_letters_test.npy", labels_test)
    def loadEmnist(self):
        """
        Load Emnist dataset and do some data pre-processing
        Split the training set 80/20% for training and validation set
        Convert y labels to 1-k hot array
        """

        x_train, y_train = extract_training_samples('balanced')
        x_test, y_test   = extract_test_samples('balanced')

        # Get only the upper case letters
        train_alphabet_list = (np.array(y_train) < 36) & (np.array(y_train) > 9)
        test_alphabet_list  = (np.array(y_test) < 36) & (np.array(y_test) > 9)

        y_train = y_train[train_alphabet_list] - 10
        x_train = x_train[train_alphabet_list]
        y_test = y_test[test_alphabet_list] - 10
        x_test = x_test[test_alphabet_list]

        self.nclass = 26
        self.width  = x_train.shape[1]
        self.height = x_train.shape[2]
        self.total_train_size = len(x_train)
        self.ntrain = int(0.9 * self.total_train_size)
        self.nval = int(0.1 * self.total_train_size)
        self.ntest  = len(x_test)
        self.train_counter = 0
        self.train_index = np.arange(self.ntrain)

        x_train = x_train.reshape(x_train.shape[0], self.width, self.height, 1)
        x_test = x_test.reshape(x_test.shape[0], self.width, self.height, 1)
        input_shape = (self.width, self.height, 1)

        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        self.x_test = x_test/255

        self.x_val = x_train[self.ntrain:self.total_train_size]
        self.x_train = x_train[0:self.ntrain]
        y_val = y_train[self.ntrain:self.total_train_size]
        y_train = y_train[0:self.ntrain]

        # convert class vectors to binary class matrices
        self.y_train = keras.utils.to_categorical(y_train, 26)
        self.y_val = keras.utils.to_categorical(y_val, 26)
        self.y_test = keras.utils.to_categorical(y_test, 26)

        print(self.x_train.shape)
        print(self.x_val.shape)
        print(self.x_test.shape)
예제 #13
0
    def __init__(self,
                 number_of_authors,
                 number_of_pixels=4,
                 poisoned_ratio=0.2,
                 backdoor_value=1,
                 initial_shuffle=True,
                 seed=None):

        X_train, y_train = emnist.extract_training_samples('digits')
        X_test, y_test = emnist.extract_test_samples('digits')
        X = np.concatenate((X_train, X_test))
        y = np.concatenate((y_train, y_test))

        # IMPORTANT:
        # create imbalanced datasets, i.e., the number of elements in each digit class of the same author may vary.
        # But the number of samples per author is balanced, i.e., each author has the same number of samples.

        samples_per_author = len(X) // number_of_authors

        author = np.repeat(np.arange(number_of_authors), samples_per_author)

        # throw leftover datasamples away such that we have same number of samples for each author
        skip_at_end = len(X) - len(author)
        assert skip_at_end < samples_per_author, "Why do you throw so many samples away?"
        if skip_at_end > 0:
            print(
                f"Warning: throwing {skip_at_end} samples away to have balanced number of samples per author"
            )

        X = X[:len(author)]
        y = y[:len(author)]

        # flatten X[:,-]
        print(X.shape)
        X = X.reshape((len(X), 784))
        print(X.shape)
        # binarize data
        # X[X<128] = 0
        # X[X>127] = 255
        X = X / 255

        super(PoisonedDataset_EMNIST_DIGITS,
              self).__init__(X,
                             y,
                             author,
                             number_of_classes=10,
                             number_of_pixels=number_of_pixels,
                             poisoned_ratio=poisoned_ratio,
                             backdoor_value=backdoor_value,
                             initial_shuffle=initial_shuffle,
                             seed=seed)
예제 #14
0
def load_data():
    # Get numbers and letters data from EMNIST
    X_train, train_labels = extract_training_samples('byclass')
    X_test, test_labels = extract_test_samples('byclass')

    # Remove capital letters
    X_train, train_labels = remove_upper(X_train, train_labels)
    X_test, test_labels = remove_upper(X_test, test_labels)

    # Merge train and test datasets
    X = np.vstack((X_train, X_test))
    labels = np.hstack((train_labels, test_labels))

    return X, labels
예제 #15
0
def load_data():
    X_train, train_labels = extract_training_samples('byclass')
    X_test, test_labels = extract_test_samples('byclass')

    X_train, train_labels = remove_upper(X_train, train_labels)
    X_test, test_labels = remove_upper(X_test, test_labels)

    chars = '0123456789' + string.ascii_lowercase
    num_chars = len(chars)

    X_train = X_train.reshape(-1, 28, 28, 1)
    X_test = X_test.reshape(-1, 28, 28, 1)

    return X_train, X_test, train_labels, test_labels
예제 #16
0
def save_emnist_reduced_letters_dataset():
    x_train, y_train = emnist.extract_training_samples('letters')
    x_test, y_test = emnist.extract_test_samples('letters')
    # Переход к меткам диапазона [0..25]
    y_train = np.subtract(y_train, 1)
    y_test = np.subtract(y_test, 1)

    # train_mask = label_filter(y_train)
    train_mask = label_filter(y_train)
    test_mask = label_filter(y_test)

    x_train_reduced = x_train[train_mask]
    y_train_reduced = y_train[train_mask]
    y_train_reduced = replace_x_letter_label(y_train_reduced)

    x_test_reduced = x_test[test_mask]
    y_test_reduced = y_test[test_mask]
    y_test_reduced = replace_x_letter_label(y_test_reduced)

    x_train_reduced, x_val_reduced, y_train_reduced, y_val_reduced = train_test_split(
        x_train_reduced, y_train_reduced, test_size=0.1)

    x_train_reduced = np.divide(x_train_reduced, 255).astype("float64")
    x_val_reduced = np.divide(x_val_reduced, 255).astype("float64")
    x_test_reduced = np.divide(x_test_reduced, 255).astype("float64")
    #
    x_train_reduced = x_train_reduced.reshape(x_train_reduced.shape[0],
                                              x_train_reduced.shape[1],
                                              x_train_reduced.shape[2], 1)
    x_val_reduced = x_val_reduced.reshape(x_val_reduced.shape[0],
                                          x_val_reduced.shape[1],
                                          x_val_reduced.shape[2], 1)
    x_test_reduced = x_test_reduced.reshape(x_test_reduced.shape[0],
                                            x_test_reduced.shape[1],
                                            x_test_reduced.shape[2], 1)

    letters_dataset = {
        "x_train": x_train_reduced,
        "y_train": y_train_reduced,
        "x_val": x_val_reduced,
        "y_val": y_val_reduced,
        "x_test": x_test_reduced,
        "y_test": y_test_reduced
    }

    with open("eng_letters_dataset.bin", "wb") as file:
        pickle.dump(letters_dataset, file)
    def get_data(self, s0):
        self.x_train, self.y_train = extract_training_samples('byclass')
        self.x_test, self.y_test = extract_test_samples('byclass')
        self.y_test = oneHotEncodeY(self.y_test, 62)
        self.y_train = oneHotEncodeY(self.y_train, 62)
        self.x_train = self.x_train.astype('float32')
        self.y_train = self.y_train.astype('float32')
        self.x_test = self.x_test.astype('float32')
        self.y_test = self.y_test.astype('float32')
        #print(np.amax(self.y_train))
        #print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape)
        
        self.x_train = self.x_train /255.
        self.y_train = self.y_train
        self.x_test = self.x_test/ 255.
        self.y_test = self.y_test
        
        self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1))
        self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1))
        #self.y_test = np.reshape(self.y_test,(self.y_test.shape[0],1))
        #self.y_train = np.reshape(self.y_train,(self.y_train.shape[0],1))
        
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]

        
        #images = np.reshape(images,(images.shape[0], 28, 28, 1))
        #self.x_train, self.y_train = mnist.get_set('train')
        #self.x_test, self.y_test = mnist.get_set('test')
        
        #print("//////////////////////////////")
        #print(type(images))
        #print(images.shape[1:4])
        #print(labels.shape)
        #print(images.shape)
        '''
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]
        print(np.amax(self.y_train))
        '''
        self.nb_classes = 62
        self.x_sub = self.x_test[:s0]
        self.y_sub = np.argmax(self.y_test[:s0], axis=1)

        self.x_test = self.x_test[s0:]
        self.y_test = self.y_test[s0:]
    def load_data(self):
        """
        Load data from emnist package

        # Returns:
            all_data : train data, train labels, test data and test labels
        """
        self._train_data, self._train_labels = emnist.extract_training_samples(
            'digits')
        self._train_labels = np.eye(10)[self._train_labels]
        self._test_data, self._test_labels = emnist.extract_test_samples(
            'digits')
        self._test_labels = np.eye(10)[self._test_labels]

        self.shuffle()

        return self.data
def get_data(experiment, occlusion=None, bars_type=None, one_hot=False):

    # Load EMNIST data, as part of TensorFlow.
    (train_images,
     train_labels), (test_images,
                     test_labels) = emnist.extract_training_samples(
                         'balanced'), emnist.extract_test_samples('balanced')

    # (train_images, train_labels), (test_images, test_labels) = emnist.extract_training_samples(
    #     'letters'), emnist.extract_test_samples('letters')
    #     # train_labels = train_labels.reshape(-1, )
    #     # test_labels = test_labels.reshape(-1, )

    all_data = np.concatenate((train_images, test_images), axis=0)
    all_labels = np.concatenate((train_labels, test_labels), axis=0)

    # all_labels = all_labels - 1  # Change to 0-base index for letters

    # Para tabla 1 y el experimento 2
    # for i, l in enumerate(all_labels):
    #     all_labels[i] = {
    #         36: 10,
    #         37: 11,
    #         38: 13,
    #         39: 14,
    #         40: 15,
    #         41: 16,
    #         42: 17,
    #         43: 23,
    #         44: 26,
    #         45: 27,
    #         46: 29
    #     }.get(l, l)

    all_data = add_noise(all_data, experiment, occlusion, bars_type)

    all_data = all_data.reshape(
        (131600, img_columns, img_rows, constants.colors))
    all_data = all_data.astype('float32') / 255

    if one_hot:
        # Changes labels to binary rows. Each label correspond to a column, and only
        # the column for the corresponding label is set to one.
        all_labels = to_categorical(all_labels)

    return (all_data, all_labels)
예제 #20
0
def test():

    testData, testLabels = emnist.extract_test_samples('letters')
    # import testing data from emnist
    # call them data and labels

    correct = 0
    for x in range(testData.shape[0]):
        inputs = norm(np.ndarray.flatten(testData[x]))
        guess = nn.guess(inputs)
        print("the number was: " + str(testLabels[x]))
        guess = whatIndex(guess) + 1
        print("it guessed it was :" + str(guess))
        if guess == testLabels[x]:
            correct += 1

    # print("it correctly predicted " + str(correct / len(data) * 100) + "%")
    return (correct / testData.shape[0] * 100)
def load_data(plot=True):
    # extract data from EMNIST [letters]
    images_train, labels_train = extract_training_samples('letters')
    images_test, labels_test = extract_test_samples('letters')

    if plot:
        # randomly plot 25 letters
        f, axarr = plt.subplots(5, 5)
        indices, ctr = random.sample(range(labels_train.shape[0]), 25), 0
        for i in range(5):
            for j in range(5):
                idx = indices[ctr]
                axarr[i, j].imshow(images_train[idx], cmap="gray")
                axarr[i, j].set_title(f"{letters[labels_train[idx] - 1]}")
                ctr += 1
        plt.show()

    # flatten last two dimensions to be (N, 784,)
    return images_train.reshape((images_train.shape[0], images_train.shape[1] * images_train.shape[2])), images_test.reshape((images_test.shape[0], images_test.shape[1] * images_test.shape[2])), labels_train, labels_test
예제 #22
0
def test(networkFilePath):
    n = NN.NeuralNetwork()
    n.loadFrom(networkFilePath)

    test_images, test_labels = emnist.extract_test_samples("letters")

    scorecard = []
    for i in range(len(test_images)):
        correctLabel = test_labels[i] - 1
        print("Corret label is", correctLabel)
        inputs = (np.asfarray(test_images[i].flatten()) / 255 * 0.99) + 0.01
        outputs = n.query(inputs)
        label = np.argmax(outputs)
        print("Network respone is", label)
        print()
        scorecard.append(correctLabel == label)

    print("Report: ", scorecard)
    print("Total:", len(scorecard))
    print("Correct:", sum(scorecard))
    print(sum(scorecard) / len(scorecard))
def load_mnist_data(type='channel_last'):
    from emnist import extract_training_samples, extract_test_samples
    from keras.utils import np_utils
    
    # input image dimensions
    nb_classes = 26	
    img_rows, img_cols = 28, 28
    
    X_train, Y_1 = extract_training_samples('letters')
    X_test, Y_2 = extract_test_samples('letters')
    
    y_train = []
    y_test = []
    
    for i in range(Y_1.shape[0]):
        y_train.append( Y_1[i] - 1 )
    
    for i in range(Y_2.shape[0]):
        y_test.append( Y_2[i] - 1 )
    
    y_train = np.array(y_train)
    y_test = np.array(y_test)
    
    if type == 'channel_first':
        X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
        X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    else:
        X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
        X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    print('X_train shape:', X_train.shape)
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    return X_train, Y_train, X_test, Y_test
예제 #24
0
def main():
    np.set_printoptions(suppress=True)

    # prepare training and testing datasets
    training_images, training_labels = extract_training_samples('digits')
    test_images, test_labels = extract_test_samples('digits')
    training_images = training_images[0:10000]
    training_labels = training_labels[0:10000]
    
    tr_i = [training_images[i].flatten().reshape(784).tolist() for i in range(len(training_images))]
    for i in range(len(tr_i)):
        for j in range(len(tr_i[i])):
            tr_i[i][j] /= 255.0
            
    tr_o = [[x] for x in training_labels.tolist()]
    tr_o = [[0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01] for i in range(len(training_labels))]
    for i in range(len(tr_o)):
        tr_o[i][training_labels[i]] = 0.99
       
    # initialize and train the network
    nn = NeuralNetwork(784, [16,16], 10)
    nn.train(tr_i, tr_o, 1000)

    # gauge performance
    correct = 0
    for test_image, test_label in zip(test_images[0:500], test_labels[0:500]):
        result = nn.feed_forward(test_image.flatten().reshape(784).tolist())
        print("network result:\n", result);
        max = 0
        guess = -1
        for i, res in enumerate(result):
            if res > max:
                max = res
                guess = i
        print('network thinks this is a: ', guess)
        print("real answer:", test_label)
        if guess == int(test_label):
            correct += 1
    print('network was correct on ', correct, '/', 500, 'images')
예제 #25
0
def build_dataset_manual(dataset_name, opts):
    """ Build train, valid, test datasets based on model options """
    opts = MnistDataOptions(**opts)
    logging.info('Building dataset with options: %s', opts)

    # Load train and test data (MNIST)
    # train: 60k instances
    # test: 10k instances
    if (dataset_name == 'MNIST'):
        train, test = tf.keras.datasets.mnist.load_data()
    elif (dataset_name == 'EMNIST'):
        train = extract_training_samples('bymerge')
        test = extract_test_samples('bymerge')
    else:
        raise ValueError('Dataset is not supported!')

    # ALL we need is the test dataset so the train/valid doesnot matter here
    if (opts.split == 'train'):
        images, labels = train[0][0:50000], train[1][0:50000]
    elif (opts.split == 'valid'):
        images, labels = train[0][50000:60000], train[1][50000:60000]
    elif (opts.split == 'test'):
        images, labels = test
    else:
        raise ValueError('opts.split is not valid!')

    # Change images size
    if (dataset_name == 'MNIST') | (dataset_name == 'EMNIST'):
        images = np.expand_dims(images, -1)
    images = images / 255

    if opts.rotate_degs:
        images = scipy.ndimage.rotate(images, opts.rotate_degs, axes=[-2, -3])
        images = _crop_center(images, 28)
    if opts.roll_pixels:
        images = np.roll(images, opts.roll_pixels, axis=-2)

    return images, labels
예제 #26
0
    def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder already."""
        from six.moves import urllib
        import gzip

        print("download: trying to download")
        if self._check_exists():
            print("download: already exists so exiting")
            return

        # download files
        try:
            os.makedirs(os.path.join(self.root, self.raw_folder))
            os.makedirs(os.path.join(self.root, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        for url in self.urls:
            print('Downloading ' + url)
            data = urllib.request.urlopen(url)
            filename = url.rpartition('/')[2]
            file_path = os.path.join(self.root, self.raw_folder, filename)
            with open(file_path, 'wb') as f:
                f.write(data.read())
            with open(file_path.replace('.gz', ''), 'wb') as out_f, \
                    gzip.GzipFile(file_path) as zip_f:
                out_f.write(zip_f.read())
            os.unlink(file_path)

        # process and save as torch files
        print('Processing...')
        train_label, train_non_few_shot_ids, train_few_shot_ids = read_label_file(
            os.path.join(self.root, self.raw_folder,
                         'train-labels-idx1-ubyte'), self.few_shot_class)
        train_img = read_image_file(os.path.join(self.root, self.raw_folder,
                                                 'train-images-idx3-ubyte'),
                                    non_few_shot_ids=train_non_few_shot_ids)

        training_set = (train_img, train_label)

        test_label, test_non_few_shot_ids, test_few_shot_ids = read_label_file(
            os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'),
            self.few_shot_class)
        test_img = read_image_file(os.path.join(self.root, self.raw_folder,
                                                't10k-images-idx3-ubyte'),
                                   few_shot_ids=test_few_shot_ids)

        if self.test_emnist:
            print("Download: Entering Emnist test")
            from emnist import extract_test_samples
            images, labels = extract_test_samples('letters')
            print(images.shape)
            print(labels.shape)
            #randomly grab a letter
            import random
            rand_letter_idx = random.randint(0, 25)
            #idx for selected letter clas
            test_sample_ids = np.where(labels < 10)[0]
            np.random.seed(10)
            np.random.shuffle(test_sample_ids)

            print('test_sample_ids_len', len(test_sample_ids))
            #grab labels and images from that class
            labels = labels[test_sample_ids]
            images = images[test_sample_ids]
            print("After selecting one class")
            print(images.shape)
            print(labels.shape)
            #assert(self.few_shot_class not in labels)
            if self.max_test_sample:
                test_set = {
                    torch.ByteTensor(list(images[:self.max_test_sample])).view(
                        -1, 28, 28),
                    torch.LongTensor(list(labels[:self.max_test_sample]))
                }
            else:
                test_set = {
                    torch.ByteTensor(list(images)).view(-1, 28, 28),
                    torch.LongTensor(list(labels))
                }
        else:
            # test_label, test_non_few_shot_ids, test_few_shot_ids=  read_label_file(os.path.join(self.root, self.raw_folder, 't10k-labels-idx1-ubyte'), self.few_shot_class)
            # test_img = read_image_file(os.path.join(self.root, self.raw_folder, 't10k-images-idx3-ubyte'), few_shot_ids=test_few_shot_ids)
            if (self.max_test_sample):
                print('testing max test sample')
                test_set = (test_img[:self.max_test_sample],
                            test_label[:self.max_test_sample])

            else:
                test_set = (test_img, test_label)
        print('confirming test size')
        #print(len(test_set[0]), len(test_set[1]))
        with open(
                os.path.join(self.root, self.processed_folder,
                             self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(
                os.path.join(self.root, self.processed_folder, self.test_file),
                'wb') as f:
            torch.save(test_set, f)

        print('Done!')
예제 #27
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    TRAIN_IMAGES = 'emnist-letters-train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'emnist-letters-train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 'emnist-letters-test-images-idx3-ubyte.gz'
    TEST_LABELS = 'emnist-letters-test-labels-idx1-ubyte.gz'

    eminst_train_images, eminst_train_labels = extract_training_samples(
        'letters')
    eminst_test_images, eminst_test_labels = extract_test_samples('letters')

    train_images = eminst_train_images
    train_labels = makes_one_hot_vectors(eminst_train_labels)
    test_images = eminst_test_images
    test_labels = makes_one_hot_vectors(eminst_test_labels)

    ##  local_file = train_dir+'emnist-letters-train-images-idx3-ubyte.gz'
    ##  with open(local_file, 'rb') as f:
    ##    train_images = extract_images(f)

    ##  local_file = train_dir+'emnist-letters-train-labels-idx1-ubyte.gz'
    ##  with open(local_file, 'rb') as f:
    ##    train_labels = extract_labels(f, one_hot=one_hot)

    ##  local_file = train_dir+'emnist-letters-test-images-idx3-ubyte.gz'
    ##  with open(local_file, 'rb') as f:
    ##    test_images = extract_images(f)

    ##  local_file = train_dir+'emnist-letters-test-labels-idx1-ubyte.gz'
    ##  with open(local_file, 'rb') as f:
    ##    test_labels = extract_labels(f, one_hot=one_hot)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                         validation_labels,
                         dtype=dtype,
                         reshape=reshape)
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)
# from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D

# from keras.layers.convolutional import Conv2D
# from keras.layers.convolutional import MaxPooling2D
# from keras import backend as K
# from tensorflow.keras.datasets import mnist
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import confusion_matrix

from emnist import extract_training_samples
images_train, labels_train = extract_training_samples('balanced')
from emnist import extract_test_samples
images_test, labels_test = extract_test_samples('balanced')

dims = images_train.shape[1] * images_train.shape[2]

## DENSE NN
X_train = images_train.reshape(images_train.shape[0], dims)
X_test = images_test.reshape(images_test.shape[0], dims)

## CONV NN
# X_train = images_train.reshape(images_train.shape[0], 28,28,1)
# X_test = images_test.reshape(images_test.shape[0], 28,28,1)

print("Training Shape:", X_train.shape)
print("Testing Shape:", X_test.shape)

X_train = X_train.astype('float32') / 255
예제 #29
0
#ignore warning messages
import warnings
warnings.filterwarnings('ignore')

sns.set()

# pip install emnist
# Import Dataset(s)
from emnist import list_datasets
list_datasets()

from emnist import extract_training_samples
images_train, labels_train = extract_training_samples('letters')
from emnist import extract_test_samples
images_test, labels_test = extract_test_samples('letters')

# Flatten Data
dims = images_train.shape[1] * images_train.shape[2]
X_train = images_train.reshape(images_train.shape[0], dims)
X_test = images_test.reshape(images_test.shape[0], dims)

# Rescale to 0 -> 1 by dividing by max pixel value (255)
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

# One-Hot Encoding

from keras.utils import np_utils  # used to convert array of labeled data to one-hot vector
# should be 26 but out of index?
# Effects accuracy as have a class where their will be no results
from google.colab import drive
drive.mount('/content/gdrive/')

# Commented out IPython magic to ensure Python compatibility.
!pip install emnist
import emnist
import tensorflow as tf
from tensorflow import keras
from keras.utils import np_utils
import matplotlib.pyplot as plt
import os
# %matplotlib inline

#Load Dataset
train_data,train_labels=emnist.extract_training_samples('bymerge')
test_data,test_labels=emnist.extract_test_samples('bymerge')
test=test_labels
plt.imshow(test_data[0])

#Reshaping Training Data to make 28X28 grid image
train_data=train_data.reshape(train_data.shape[0],28,28,1).astype('float32')
test_data=test_data.reshape(test_data.shape[0],28,28,1).astype('float32')

#Normalizing data
train_data=train_data/255
test_data=test_data/255

#One Hot Encoding
train_labels=np_utils.to_categorical(train_labels)
test_labels=np_utils.to_categorical(test_labels)