Ejemplo n.º 1
0
def GetMnistData(data_path):
    """
    Function:
        Read mnist dataset and transfer it into wanted format.
        For input:
            if not CNN: (60000, 784)
            elif CNN:   (60000, 28, 28, 1)
        For output: one hot
            [0, 1, 0, 0, ..., 0]
    """
    # read dataset
    X_train, Y_train_original = loadlocal_mnist(
        images_path=data_path + "train-images-idx3-ubyte",
        labels_path=data_path + "train-labels-idx1-ubyte")
    X_test, Y_test_original = loadlocal_mnist(
        images_path=data_path + "t10k-images-idx3-ubyte",
        labels_path=data_path + "t10k-labels-idx1-ubyte")
    # transfer into float32
    X_train = X_train.astype(np.float32)
    X_test = X_test.astype(np.float32)
    X_train /= 255.
    X_test /= 255.

    # find how many classes
    all_classes = np.unique(Y_train_original)
    num_class = len(all_classes)
    num_input = X_train.shape[1]

    # transfer label format
    Y_train = TranslateLables(Y_train_original, num_class)
    Y_test = TranslateLables(Y_test_original, num_class)
    return X_train, Y_train, X_test, Y_test, num_input, num_class
Ejemplo n.º 2
0
def load_mnist(path=MNIST_PATH):
    X_train, y_train = loadlocal_mnist(
        images_path=MNIST_PATH + 'train-images.idx3-ubyte',
        labels_path=MNIST_PATH + 'train-labels.idx1-ubyte')
    X_test, y_test = loadlocal_mnist(
        images_path=MNIST_PATH + 't10k-images.idx3-ubyte',
        labels_path=MNIST_PATH + 't10k-labels.idx1-ubyte')
    return X_train, X_test, y_train, y_test
Ejemplo n.º 3
0
 def data_processing(self):
     train_x, train_y = loadlocal_mnist("/content/drive/MyDrive/ass3_data/train-images.idx3-ubyte", "/content/drive/MyDrive/ass3_data/train-labels.idx1-ubyte")
     test_x, test_y = loadlocal_mnist("/content/drive/MyDrive/ass3_data/t10k-images.idx3-ubyte", "/content/drive/MyDrive/ass3_data/t10k-labels.idx1-ubyte")
     self.train_x = preprocessing.normalize(train_x)
     self.test_x = preprocessing.normalize(test_x)
     enc = OneHotEncoder(sparse=False, categories='auto')
     self.train_y = enc.fit_transform(train_y.reshape(len(train_y), -1))
     self.test_y = enc.transform(test_y.reshape(len(test_y), -1))
Ejemplo n.º 4
0
def load_idx(path):
    X_train, y_train = loadlocal_mnist(
        images_path=path + 'train-images-idx3-ubyte',
        labels_path=path + 'train-labels-idx1-ubyte')

    X_test, y_test = loadlocal_mnist(
        images_path=path + 't10k-images-idx3-ubyte',
        labels_path=path + 't10k-labels-idx1-ubyte')
    return (X_train, y_train, X_test, y_test)
Ejemplo n.º 5
0
def load_data_fashion_mnist(train_path, test_path):
    from mlxtend.data import loadlocal_mnist
    train_image, train_label = loadlocal_mnist(
        images_path=train_path + "/train_image/train_image",
        labels_path=train_path + "/train_label/train_label")
    test_image, test_label = loadlocal_mnist(
        images_path=test_path + "/test_image/test_image",
        labels_path=test_path + "/test_label/test_label")
    return train_image, train_label, test_image, test_label
Ejemplo n.º 6
0
def load_data():
  files = {
    "X": "train-images-idx3-ubyte",
    "y": "train-labels-idx1-ubyte",
    "X_test": "t10k-images-idx3-ubyte",
    "y_test": "t10k-labels-idx1-ubyte"
  }
  X, y = loadlocal_mnist(images_path = files['X'], labels_path = files['y'])
  X_test, y_test = loadlocal_mnist(images_path = files['X_test'], labels_path = files['y_test'])
  return X, X_test, y, y_test 
Ejemplo n.º 7
0
def load_MNIST(data_address):
     train_images, train_labels = loadlocal_mnist(
               images_path=data_address+'/train-images.idx3-ubyte',
               labels_path=data_address+'/MNIST dataset/train-labels.idx1-ubyte')

     test_images, test_labels = loadlocal_mnist(
               images_path=data_address+'/t10k-images.idx3-ubyte',
               labels_path=data_address+'/t10k-labels.idx1-ubyte')

     return  train_images, train_labels,  test_images, test_labels
Ejemplo n.º 8
0
def prepare_mnist_data():
    X_train, y_train = loadlocal_mnist(
        images_path='../mnist/train-images-idx3-ubyte',
        labels_path='../mnist/train-labels-idx1-ubyte')

    X_test, y_test = loadlocal_mnist(
        images_path='../mnist/t10k-images-idx3-ubyte',
        labels_path='../mnist/t10k-labels-idx1-ubyte')

    return X_train, y_train, X_test, y_test
Ejemplo n.º 9
0
def loadMnist():
    train_images, train_labels = loadlocal_mnist(
        images_path='MNIST/train-images-idx3-ubyte',
        labels_path='MNIST/train-labels-idx1-ubyte')
    print(train_images.shape)
    print(train_labels.shape)
    test_images, test_labels = loadlocal_mnist(
        images_path='MNIST/t10k-images-idx3-ubyte',
        labels_path='MNIST/t10k-labels-idx1-ubyte')
    return train_images, train_labels, test_images, test_labels
Ejemplo n.º 10
0
 def __init__(self):
     self.On = True
     self.X, self.y = loadlocal_mnist(
         images_path='./data/train-images.idx3-ubyte',
         labels_path='./data/train-labels.idx1-ubyte')
     self.X_test, self.y_test = loadlocal_mnist(
         images_path='./data/t10k-images.idx3-ubyte',
         labels_path='./data/t10k-labels.idx1-ubyte')
     self.sizeX = self.X.shape[1]
     self.sizey = 10
     self.network = False
Ejemplo n.º 11
0
def loadMnist():
    data_train, label_train = loadlocal_mnist(
        images_path=os.getcwd() + '/train-images-idx3-ubyte',
        labels_path=os.getcwd() + '/train-labels-idx1-ubyte')
    data_test, label_test = loadlocal_mnist(
        images_path=os.getcwd() + '/t10k-images-idx3-ubyte',
        labels_path=os.getcwd() + '/t10k-labels-idx1-ubyte')
    #normalize the data
    data_train_norm = data_train.astype(np.float) / 255.
    data_test_norm = data_test.astype(np.float) / 255.
    return data_train_norm, data_test_norm, label_train, label_test
    def __load_mnist(self):
        """
            Load the mnist files
        """
        self._x_train, self._y_train = loadlocal_mnist(
            images_path=join(self.mnist_path, 'train-images-idx3-ubyte'),
            labels_path=join(self.mnist_path, 'train-labels-idx1-ubyte'))

        self._x_test, self._y_test = loadlocal_mnist(
            images_path=join(self.mnist_path, 't10k-images-idx3-ubyte'),
            labels_path=join(self.mnist_path, 't10k-labels-idx1-ubyte'))
Ejemplo n.º 13
0
    def __init__(self, folder_path, dataset_type='train'):

        self.dataset_type = dataset_type

        if dataset_type == 'train':
            self.x, self.y = loadlocal_mnist(
                images_path=folder_path + '/train-images.idx3-ubyte',
                labels_path=folder_path + '/train-labels.idx1-ubyte')
        else:
            self.x, self.y = loadlocal_mnist(
                images_path=folder_path + '/t10k-images.idx3-ubyte',
                labels_path=folder_path + '/t10k-labels.idx1-ubyte')
def get_test_data():
    """ zwraca testowe znormalizowane dane, X to obrazy, y to odpowiadająca mu cyfry"""
    if not platform.system() == 'Windows':
        X, y = loadlocal_mnist(images_path='t10k-images-idx3-ubyte',
                               labels_path='t10k-labels-idx1-ubyte')
    else:
        X, y = loadlocal_mnist(images_path='t10k-images.idx3-ubyte',
                               labels_path='t10k-labels.idx1-ubyte')

    X = X.astype(float) / 255

    return X, y
Ejemplo n.º 15
0
def load_data_from_file():
    # load the data
    x_train, y_train = loadlocal_mnist(images_path='train-images-idx3-ubyte',
                                       labels_path='train-labels-idx1-ubyte')

    x_test, y_test = loadlocal_mnist(images_path='test-images-idx3-ubyte',
                                     labels_path='test-labels-idx1-ubyte')
    # normalize our inputs to be in the range[-1, 1]
    x_train = (x_train.astype(np.float32) - 127.5) / 127.5
    # convert x_train with a shape of (7317, 100, 100) to (7317, 10000) so we have
    # 10000 columns per row
    x_train = x_train.reshape(7317, 10000)
    return (x_train, y_train, x_test, y_test)
Ejemplo n.º 16
0
def train(model, lrate, n_epochs, optim_fun):
    X, y = loadlocal_mnist("mnist_data/train-images.txt",
                           "mnist_data/train-labels.txt")

    model.to(device)

    loss_fun = nn.NLLLoss()
    optimizer = optim_fun(model.parameters(), lr=lrate)

    for epoch in range(n_epochs):
        avg_loss = 0
        start_time = time.time()
        for (img, label) in zip(X, y):
            model.zero_grad()

            x = x2tensor(img)

            target = y2tensor(label)

            logits = model(x)

            loss = loss_fun(logits, target)

            loss.backward()
            optimizer.step()

            avg_loss += loss.item()

        print("loss[" + str(epoch) + "] = " + str(avg_loss / X.shape[0]))
        print("------------------------------------took " +
              comp_time(start_time))
Ejemplo n.º 17
0
def ExtractAndReshape(imagesPath, labelsPath):
    images, images_labels = loadlocal_mnist(images_path=imagesPath,
                                            labels_path=labelsPath)

    images = images.reshape(len(images), 28, 28)
    images = np.array(images)
    return images, images_labels
Ejemplo n.º 18
0
def read_data_normalize_and_add_bias(classes, features):
	"""Read in the file with classes (labels) and the features for each class
	Normalizes the features and prepends a bias to each data instance
	
	Arguments:
		classes  -- the labels
		features  -- the features for each class
	
	Returns:
		The normalized data with a bias and the the labels
	"""
	
	data, labels = loadlocal_mnist(
		images_path=features, 
		labels_path=classes)
	rows = data.shape[0]

	# normalize
	data = add_bias(data / 255, rows)
	
	# # make a bias for every row
	# bais = np.ones([rows , 1], dtype=float)

	# # prepend the bias to the data
	# # axis = 1 means vertical,   axis = 0 means horizontal
	# data = np.concatenate((bais, data), axis=1)

	# return the data and the bias
	return data, labels
Ejemplo n.º 19
0
 def set_train_dataset(self):
     # Data source
     image = 'data/train-images.idx3-ubyte'
     label = 'data/train-labels.idx1-ubyte'
     x, y = loadlocal_mnist(images_path=image, labels_path=label)
     self.train_data = x
     self.train_label = np.array([self.get_label(l) for l in y])
Ejemplo n.º 20
0
Archivo: utils.py Proyecto: dliud/gan
def loadDataset(train_size=1000,
                batch_size=100,
                randSeed=17,
                image_path='./mnist/train-images-idx3-ubyte',
                label_path='./mnist/train-labels-idx1-ubyte'):
    """
    return: list of dataloaders, each containing train-size images of each number with batch size 
    """
    random.seed(randSeed)
    train_images, train_labels = loadlocal_mnist(images_path=image_path,
                                                 labels_path=label_path)

    sortedImages = [[] for _ in range(10)]
    for i in range(len(train_labels)):
        sortedImages[train_labels[i]].append(train_images[i])

    for images in sortedImages:
        random.shuffle(images)

    allData = torch.zeros((0, 785))
    dataLoaders = []
    for i in range(10):
        data = (torch.tensor(sortedImages[i][:train_size]) - 128.) / 128
        labeled = torch.cat((data, i * torch.ones((data.shape[0], 1))), 1)
        allData = torch.cat((allData, labeled), 0)
        dataLoaders.append(
            torch.utils.data.DataLoader(data,
                                        batch_size=batch_size,
                                        shuffle=True))

    labeledDataLoader = torch.utils.data.DataLoader(allData,
                                                    batch_size=batch_size,
                                                    shuffle=True)
    return dataLoaders, labeledDataLoader
Ejemplo n.º 21
0
Archivo: utils.py Proyecto: dliud/gan
def get_dev_accuracy(classifier,
                     dev_size=1000,
                     randSeed=17,
                     image_path='./mnist/train-images-idx3-ubyte',
                     label_path='./mnist/train-labels-idx1-ubyte'):
    random.seed(randSeed)
    train_images, train_labels = loadlocal_mnist(images_path=image_path,
                                                 labels_path=label_path)

    sortedImages = [[] for _ in range(10)]
    for i in range(len(train_labels)):
        sortedImages[train_labels[i]].append(train_images[i])

    for images in sortedImages:
        random.shuffle(images)

    test_images = []
    test_labels = []
    for i in range(10):
        test_images += sortedImages[i][-dev_size:]
        test_labels += [i for j in range(dev_size)]

    test = (torch.tensor(test_images) - 128.) / 128
    test_labels = torch.tensor(test_labels)
    predictions = classifier.predict(test)
    predictions = predictions.type(torch.uint8)
    return torch.mean(torch.eq(predictions, test_labels).float()).item()
Ejemplo n.º 22
0
    def _save_as_numpy(self,
                       image_path=None,
                       label_path=None,
                       image_save_path=None,
                       label_save_path=None):
        """
        save the files to *.npy format
        :param image_path: MNIST image data path (extract file path)
        :param label_path: MNIST image label path (extract file path)
        :param image_save_path: MNIST image save path as npy
        :param label_save_path: MNIST label save path as npy
        """
        images, labels = loadlocal_mnist(images_path=image_path,
                                         labels_path=label_path)

        self._image_data = images
        self._label_data = labels
        self._image_data_path = image_save_path
        self._label_data_path = label_save_path
        if not FileUtils.check_exist_with_message(
                file_path=image_save_path, message="Images Already Saved!"):
            np.save(image_save_path, images)
        if not FileUtils.check_exist_with_message(
                file_path=label_save_path, message="Labels Already Saved!"):
            np.save(label_save_path, labels)
Ejemplo n.º 23
0
def trainData():
    global epochsCount
    global optimizer
    global lossFunction
    global accuracyData

    #sys.stdout = open(os.devnull, 'w')

    myWin.consoleWrite("Loading training data ...")
    x_train, y_train = loadlocal_mnist(images_path = "train-images-idx3-ubyte", labels_path = "train-labels-idx1-ubyte")
    x_train = tf.keras.utils.normalize(x_train, axis = 1)

    myWin.consoleWrite("Adding neural network layers ...")
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(784, activation = tf.nn.relu))
    model.add(tf.keras.layers.Dense(196, activation = tf.nn.relu))
    model.add(tf.keras.layers.Dense(10, activation = tf.nn.softmax))

    myWin.consoleWrite("Optimizer = " + str(optimizer))
    myWin.consoleWrite("Loss Function = " + str(lossFunction))
    model.compile(optimizer = optimizer, loss = lossFunction, metrics = ["acc", "mse"])
    
    myWin.consoleWrite("Fitting model ...")
    history = model.fit(x_train, y_train, epochs = epochsCount)
    accuracyData = history.history["acc"]
    for metric in model.metrics_names:
        metricList = history.history[metric]
        myWin.consoleWrite(metric + " = " + str(round(metricList[len(metricList) - 1], 4)))

    model.save("smartwardrobe.model")
Ejemplo n.º 24
0
def loadMNIST(path):
    # Loading the MNIST DataSet and Divinding them into Train, validation and test
    X_train, Y_train = loadlocal_mnist(
        images_path=path + 'train-images-idx3-ubyte',
        labels_path=path + 'train-labels-idx1-ubyte')

    X_test, Y_test = loadlocal_mnist(
        images_path=path + 't10k-images-idx3-ubyte',
        labels_path=path + 't10k-labels-idx1-ubyte')

    X_validation = X_train[55000:, :]
    Y_validation = Y_train[55000:]

    X_train = X_train[0:55000, :]
    Y_train = Y_train[0:55000]

    return X_train, Y_train, X_validation, Y_validation, X_test, Y_test
Ejemplo n.º 25
0
def load_mnist():
    X, y = loadlocal_mnist(
        images_path=
        r"C:\data\git\Dict\Dictionary_Learning\data\t10k-images.idx3-ubyte",
        labels_path=
        r"C:\data\git\Dict\Dictionary_Learning\data\t10k-labels.idx1-ubyte")
    X = np.reshape(np.uint8(X), (X.shape[0], 28, 28))
    return X, y
def get_train_validation_data(ratio):
    """ zwraca treningowe znormalizowane dane, X to obrazy, y to odpowiadająca mu cyfry
    ratio - jaka część będzie treningowa (z 60000)"""
    if not platform.system() == 'Windows':
        X, y = loadlocal_mnist(images_path='train-images-idx3-ubyte',
                               labels_path='train-labels-idx1-ubyte')
    else:
        X, y = loadlocal_mnist(images_path='train-images.idx3-ubyte',
                               labels_path='train-labels.idx1-ubyte')

    X = X.astype(float) / 255
    X_train = X[0:int(ratio * len(X))]
    y_train = y[0:int(ratio * len(y))]
    X_validation = X[int(ratio * len(X)):len(X)]
    y_validation = y[int(ratio * len(y)):len(X)]

    return X_train, y_train, X_validation, y_validation
Ejemplo n.º 27
0
def extract_mnist_dataset():
    PROJECT_DIR = os.getcwd()

    dataset_path = PROJECT_DIR + "/hw5/dataset/"
    X, Y = loadlocal_mnist(
        images_path=dataset_path + 'train-images.idx3-ubyte',
        labels_path=dataset_path + 'train-labels.idx1-ubyte')
    return X, Y
Ejemplo n.º 28
0
def read_emnist_data():
    path = "../../../datasets/emnist"
    print("Path for EMNIST data is " + path)

    x_train, y_train = loadlocal_mnist(images_path=path + "/train-images-ubyte", labels_path=path + "/train-labels-ubyte")
    x_test, y_test = loadlocal_mnist(images_path=path + "/test-images-ubyte", labels_path=path + "/test-labels-ubyte")

    x_train = x_train.reshape((len(x_train), 28, 28), order='F') / 255.0
    x_test = x_test.reshape((len(x_test), 28, 28), order='F') / 255.0

    def reducer(x):
        return x - 1

    # reduce every label by 1, because labels dont start at 0
    y_train = reducer(y_train)
    y_test = reducer(y_test)

    return x_train, y_train, x_test, y_test
Ejemplo n.º 29
0
def read_test_mnist():
    X, y = loadlocal_mnist(
        images_path=
        '/Users/marek/marek_files/priv/mini/perceptron/mnist/t10k-images.idx3-ubyte',
        labels_path=
        '/Users/marek/marek_files/priv/mini/perceptron/mnist/t10k-labels.idx1-ubyte'
    )

    return X, y
Ejemplo n.º 30
0
def save_mnist():
    X, y = loadlocal_mnist(images_path='train-images.idx3-ubyte',
                           labels_path='train-labels.idx1-ubyte')
    y = y.reshape(len(y), 1)

    X = np.where(X > 0, 1, 0)

    data = np.concatenate((X, y), axis=1)
    np.savetxt(fname='data.csv', X=data, delimiter=',', fmt='%d')
def KNN_classifier(nearest_neighours = 3, metric = 'manhattan'):
    # print "Creating Dataset from MNIST Data"
    start_time = time.time()
    training_image_data, training_label_data = loadlocal_mnist(
        images_path=os.getcwd()+'/train-images.idx3-ubyte', 
        labels_path=os.getcwd()+'/train-labels.idx1-ubyte')
    testing_image_data, testing_label_data = loadlocal_mnist(
        images_path=os.getcwd()+'/t10k-images.idx3-ubyte', 
        labels_path=os.getcwd()+'/t10k-labels.idx1-ubyte')
    end_time = time.time() - start_time
    # print "It took "+ str(end_time) + " to make the dataset"

    # print '\nTraining data'
    start_time = time.time()
    knn_classifier = KNeighborsClassifier(n_neighbors=nearest_neighours, metric=metric)
    knn_classifier.fit(training_image_data, training_label_data) 
    end_time = time.time() - start_time
    # print "It took "+ str(end_time) + " to train the classifier"
    # print 'Training Completed'

    # print '\nTesting data '
    start_time = time.time()
    match_knn_classifier = 0
    unmatch_knn_classifier = 0
    predicted_labels = knn_classifier.predict(testing_image_data)
    for i in range(0,len(testing_image_data)):
        if( testing_label_data[i] == predicted_labels[i]):
            match_knn_classifier = match_knn_classifier + 1
        else:
            unmatch_knn_classifier = unmatch_knn_classifier + 1
    knn_classifier_accuracy = (float) (match_knn_classifier )/ (match_knn_classifier + unmatch_knn_classifier)
    # knn_classifier_accuracy = knn_classifier.score(images_test, labels_test)
    end_time = time.time() - start_time
    # print "It took "+ str(end_time) + " to test the data "

    # print '\nPrinting Accuracy'
    print "\nTesting for n_neighbors = "+str(nearest_neighours)+" and metric = "+str(metric)
    print "-------------------------------------------------"
    print "KNeighborsClassifier accuracy : "+ str(knn_classifier_accuracy)

    return knn_classifier_accuracy