예제 #1
0
def main():
    # MNIST path data
    data_path = './Data/'
    mndata = MNIST(data_path)
    images_training, labels_training = mndata.load_training()
    images_testing, labels_testing = mndata.load_testing()

    images_training = np.asarray(images_training)
    images_testing = np.asarray(images_testing)
    # Normalize data
    images_training_normalize = preprocessing.normalize(images_training)
    labels_training = np.asarray(labels_training)

    # # This converts and plots the pca of a c
    x_hat, D = convert_pca(images_training_normalize[1])

    data = np.array(images_training_normalize[1]).reshape(28, 28)
    plot_demo(data, x_hat)

    # We can plot our scree plot to determine what percentage of our variance is attributed to those features
    plot_scree(D)

    # Adding noise
    images_training_noise = create_noise(images_training)
    images_training_noise = np.array(images_training_noise)
    images_training_noise = preprocessing.normalize(images_training_noise)

    x_hat, D = convert_pca(images_training_noise[1])
    data = np.array(images_training_noise[1]).reshape(28, 28)
    plot_demo(data, x_hat, fname='Noise_Comparison')
    plot_scree(D, n=15, name='Scree_Plot_Noise')
예제 #2
0
def MnistData(filepath = mnistpath): 
    '''Loads Mnist and converts it into a pytorch tensor 
    
    Input
    -----
    
    filepath - 'string'
        Filepath for the location of the MNIST dataset
        
    Output
    ------
    
    Xtrain - 'torch.tensor'
        Training set for the Mnist dataset 
        
    trainLabels - 'torch.tensor'
        train labels
        
    Xtest - 'torch.tensor'
        Test set for the Mnist dataset
        
    testLabels - 'torch.tensor'
        test labels
    '''
    
    # Load the MNIST Dataset 
    mndata = MNIST(filepath)
    Xtrain, trainLabels = map(torch.tensor, mndata.load_training())
    Xtest, testLabels  = map(torch.tensor, mndata.load_testing())
    Xtrain = Xtrain/255.0 # normalize dataset 
    Xtest = Xtest/255.0
    
    
    return Xtrain, trainLabels, Xtest, testLabels
예제 #3
0
파일: pca.py 프로젝트: salezaraus/CSE546-
def MnistData(filepath=mnistpath):
    '''Loads Mnist and converts it into a np.array
    
    Input
    -----
    
    filepath - 'string'
        Filepath for the location of the MNIST dataset
        
    Output
    ------    
    Xtrain - 'np.array'
        Training set for the Mnist dataset      
    
    Xtest - 'np.array'
        Test set for the Mnist dataset
        
    '''

    # Load the MNIST Dataset
    mndata = MNIST(filepath)
    X, Labels = map(np.array, mndata.load_training())

    Xtrain = X[:50000]
    Xtest = X[50000:]

    Xtrain = Xtrain / 255.0  # normalize dataset
    Xtest = Xtest / 255.0

    return Xtrain, Labels, Xtest
예제 #4
0
def get_mnist():
    mnist = MNIST('../datasets/MNIST')
    print("Loading Data ... ")
    x_train, y_train = mnist.load_training()  #60000 samples
    x_test, y_test = mnist.load_testing()  #10000 samples

    x_train = np.asarray(x_train).astype(np.float32) / 255
    y_train = np.asarray(y_train).astype(np.int32)
    x_test = np.asarray(x_test).astype(np.float32) / 255
    y_test = np.asarray(y_test).astype(np.int32)

    print("formatting data")
    training_labels = []
    for y in y_train:
        label = np.zeros((10, 1))
        label[y] = 1
        training_labels.append(label)

    testing_labels = []
    for y in y_test:
        label = np.zeros((10, 1))
        label[y] = 1
        testing_labels.append(label)

    training_data = []
    for x, y in zip(x_train, training_labels):
        training_data.append((x.reshape(784, 1), y))

    testing_data = []
    for x, y in zip(x_test, testing_labels):
        testing_data.append((x.reshape(784, 1), y))

    return (training_data, testing_data)
예제 #5
0
def get_mnist_data():
    mnist_data = MNIST('python-mnist/data')
    train_X, train_y = mnist_data.load_training()
    test_X, test_y = mnist_data.load_testing()
    train_X = np.array(train_X).reshape(-1, 28, 28)
    test_X = np.array(test_X).reshape(-1, 28, 28)
    train_y, test_y = np.array(train_y), np.array(test_y)
    return train_X, train_y, test_X, test_y
예제 #6
0
def classifier():
    mndata = MNIST("src/data/")
    images, labels = mndata.load_training()

    clf = KNeighborsClassifier()

    train_x = images[:100]
    train_y = labels[:100]

    clf.fit(train_x, train_y)

    return clf
예제 #7
0
파일: manager.py 프로젝트: rmyhovych/wgan
def save():
    mndata = MNIST(PATH_RAW)

    images, labels = mndata.load_training()
    data = list(zip(images, labels))
    random.shuffle(data)

    images = [d[0] for d in data]
    labels = [d[1] for d in data]

    images = torch.tensor([[float(j) / 256.0 for j in i] for i in images],
                          dtype=torch.float)
    labels = torch.tensor([[j] for j in labels], dtype=torch.long)

    torch.save(images, PATH_IMAGES)
    torch.save(labels, PATH_LABELS)
예제 #8
0
        def __init__(self, cv_iters):
            """
			create df for features and labels
			remove samples that are not shared between the two tables
			"""
            assert cv_iters > 2, 'Cross validation folds must be more than 2 folds'
            self.cv_iters = cv_iters
            mndata = MNIST('data')
            self.features, self.labels = mndata.load_training()
            images, labels = mndata.load_testing()
            self.features = self.features + images
            self.features = np.array(self.features)
            self.labels = self.labels + labels
            self.labels = np.reshape(np.array(self.labels), (-1, 1))
            self.labels = self.labels == 8  #6825 samles of 8 in total of 70000 samples

            self.shuffle()
예제 #9
0
def load_dataset(ds_path, training=True, mnist_format=True):
    if mnist_format:
        dataset = MNIST(ds_path)
        if training:
            return (dataset.load_training()[0], dataset.train_labels)
        else:
            return (dataset.load_testing()[0], dataset.test_labels)
    else:
        if training:
            with open(ds_path + "/training.json") as training_json:
                dataset = json.load(training_json)
        else:
            with open(ds_path + "/training.json") as testing_json:
                dataset = json.load(testing_json)

        imgs = [ast.literal_eval(img) for img in list(dataset.keys())]
        labels = list(dataset.values())
        return (imgs, labels)
예제 #10
0
def MnistData(filepath=mnistpath):
    '''Loads Mnist and one hot encodes Y variable for both train and test 
    variables
    
    Input
    -----
    
    filepath - 'string'
        Filepath for the location of the MNIST dataset
        
    Output
    ------
    
    Xtrain - 'np.array'
        Training set for the Mnist dataset 
        
    Ytrain - 'np.array'
        One-hot encoded Y trained labels
        
    Xtest - 'np.array'
        Training set for the Mnist dataset 
        
    Ytest - 'np.array'
        One-hot encoded Y trained labels
    '''

    # Load the MNIST Dataset
    mndata = MNIST(filepath)
    Xtrain, labels_train = map(np.array, mndata.load_training())
    Xtest, labels_test = map(np.array, mndata.load_testing())
    Xtrain = Xtrain / 255.0  # normalize dataset
    Xtest = Xtest / 255.0

    n, d = Xtrain.shape
    k = labels_train.max() + 1  # number of classes
    m = len(labels_test)  # number of test observations

    Ytrain = np.zeros((n, k))
    Ytrain[np.arange(n), labels_train] = 1

    Ytest = np.zeros((m, k))
    Ytest[np.arange(m), labels_test] = 1

    return Xtrain, Ytrain, labels_train, Xtest, Ytest, labels_test
예제 #11
0
def EncodeMnist(values=(2, 7), encodingVal=(-1, 1), filepath=mnistpath):
    '''Function loads the mnist data, filters out anything outside of 
    values indicated and encodes the labels with the encoded values indicated
    
    Input
    -----
    filepath - 'string'
        Currently set to a global variable where the location of the mnist 
        path
    
    values - 'list or tuple'
        Two values(0-9) to use for selecting binary values
        
    Output
    ------ 
    X_trainC - 'np.array'
        training data set for the selected values 
        
    X_testC - 'np.array'
        test data set for the selected values 
        
    Y_train_lab - 'np.array'
        encoded values for the Y Values of the training set 
        
    Y_test_lab - 'np.array'
        encoded values for the Y Values of the training set 
    '''

    # Load the MNIST Dataset
    mndata = MNIST(filepath)
    X_train, labels_train = map(np.array, mndata.load_training())
    X_test, labels_test = map(np.array, mndata.load_testing())
    X_train = X_train / 255.0  # normalize dataset
    X_test = X_test / 255.0

    XtrainC, trainLab = binaryData(values, X_train, labels_train)
    XtestC, testLab = binaryData(values, X_test, labels_test)

    Ytrainlab = binarycode(trainLab, values, encodingVal)
    Ytestlab = binarycode(testLab, values, encodingVal)

    return XtrainC, Ytrainlab, XtestC, Ytestlab
import keras 
#from __future__ import print_function 
from keras.datasets import fashion_mnist
from mnist.loader import MNIST

'''
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
print('x_train shape:\t', x_train.shape)
print('x_test shape:\t', x_test.shape)
print('y_train shape:\t', y_train.shape)
print('y_test shape:\t', y_test.shape)
'''

mndata = MNIST('E:/AI/example/fashion')
mndata.load_testing()
mndata.load_training()
x_test = mndata.test_images
x_train = mndata.train_images
y_test = np.asarray(mndata.test_labels)
y_train = np.asarray(mndata.train_labels)

x_test = np.asarray(x_test)
x_train = np.asarray(x_train)
y_test = np.asarray(y_test)
y_train = np.asarray(y_train)

x_test = x_test.reshape(10000, 28,28)
x_train = x_train.reshape(60000, 28,28)

print('x_train shape:\t', x_train.shape)
print('x_test shape:\t', x_test.shape)
예제 #13
0
import numpy as np
import cv2
from collections import deque
from mnist.loader import MNIST
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.callbacks import ModelCheckpoint

# Use python-mnist library to import pre-shuffled EMNIST letters data
emnist_data = MNIST(path='data\\', return_type='numpy')
emnist_data.select_emnist('letters')
X, y = emnist_data.load_training()

# Reshape the data
X = X.reshape(124800, 28, 28)
y = y.reshape(124800, 1)

# Make it 0 based indices
y = y - 1

# Split test and train
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=111)

# Rescale the Images by Dividing Every Pixel in Every Image by 255
# Rescale [0,255] --> [0,1]
X_train = X_train.astype('float32') / 255
예제 #14
0
파일: main.py 프로젝트: rlui94/MNIST
            predictions[i] = perceptrons[i].predict(data.images[n])
        prediction = np.argmax(predictions)
        if prediction == data.labels[n]:
            correct += 1
    return correct / data.size


def make_conf_matrix(data, perceptrons):
    predictions = np.zeros(CLASSES)
    matrix = np.zeros((CLASSES, CLASSES))
    for n in range(0, data.size):
        for i in range(0, CLASSES):
            predictions[i] = perceptrons[i].predict(data.images[n])
        prediction = np.argmax(predictions)
        matrix[prediction, data.labels[n]] += 1
    return print(matrix)


if __name__ == '__main__':
    mndata = MNIST('./images/')
    train_images, train_labels = mndata.load_training()
    test_images, test_labels = mndata.load_testing()
    train_data = data.Data()
    train_data.load(60000, train_images, train_labels)
    test_data = data.Data()
    test_data.load(10000, test_images, test_labels)
    perceps = [perceptron.Perceptron(784) for i in range(CLASSES)]
    # weights = np.random.rand(10, 785) - .5
    train_on_set(train_data, test_data, perceps, ETA, 70)
    make_conf_matrix(test_data, perceps)
예제 #15
0
def main():
    # MNIST path data
    data_path = './Data/'
    mndata = MNIST(data_path)
    images_training, labels_training = mndata.load_training()
    images_testing, labels_testing = mndata.load_testing()

    images_training = np.asarray(images_training)
    images_testing = np.asarray(images_testing)
    images_training_noise = create_noise(images_training)
    images_testing_noise = create_noise(images_testing)
    labels_training = np.asarray(labels_training)
    labels_testing = np.asarray(labels_testing)
    # Normalize data
    images_training = preprocessing.normalize(images_training)
    images_testing = preprocessing.normalize(images_testing)
    images_training_noise = preprocessing.normalize(images_training_noise)
    images_testing_noise = preprocessing.normalize(images_testing_noise)

    images_training_pca_noise, images_testing_pca_noise = create_training_and_testing_data(
        images_training_noise,
        labels_training,
        images_testing_noise,
        labels_testing,
        n=15)

    pca_accuracy_list = deque()
    accuracy_list = deque()
    print('Starting classification')

    # KNN
    print('Starting KNN')
    knn_score = deque()
    knn_pca_score = deque()
    knn = KNeighborsClassifier()
    knn = knn.fit(images_training_noise, labels_training)
    knn_score.append(knn.score(images_testing_noise, labels_testing))

    knn_pca = KNeighborsClassifier()
    knn_pca = knn_pca.fit(images_training_pca_noise, labels_training)
    knn_pca_score.append(
        knn_pca.score(images_testing_pca_noise, labels_testing))
    knn_score = np.array(knn_score)
    knn_pca_score = np.array(knn_pca_score)
    accuracies_df = pd.DataFrame({
        'Scores': knn_score,
        'PCA_Scores': knn_pca_score
    }).to_csv('KNNAccuracies_noise.csv', index=False)

    # Random Forest Accuracies
    clf_score = deque()
    clf_pca_score = deque()
    for n in range(1, 10):
        print('Iteration {} of Random Forest Classifier'.format(n))
        clf = RandomForestClassifier(n_estimators=100)
        clf = clf.fit(images_training_noise, labels_training)
        clf_score.append(clf.score(images_testing_noise, labels_testing))

        clf_pca = RandomForestClassifier(n_estimators=100)
        clf_pca = clf_pca.fit(images_training_pca_noise, labels_training)
        clf_pca_score.append(
            clf_pca.score(images_testing_pca_noise, labels_testing))
    clf_score = np.array(clf_score)
    clf_pca_score = np.array(clf_pca_score)
    accuracies_df = pd.DataFrame({
        'Scores': clf_score,
        'PCA_Scores': clf_pca_score
    }).to_csv('RandomForestAccuracies_noise.csv', index=False)

    # Decision Tree Accuracies
    dtc_score = deque()
    dtc_pca_score = deque()
    for n in range(1, 10):
        print('Iteration {} of Decision Tree Accuracies'.format(n))
        dtc = tree.DecisionTreeClassifier()
        dtc = dtc.fit(images_training_noise, labels_training)
        dtc_score.append(dtc.score(images_testing_noise, labels_testing))

        dtc_pca = tree.DecisionTreeClassifier()
        dtc_pca = dtc_pca.fit(images_training_pca_noise, labels_training)
        dtc_pca_score.append(
            dtc_pca.score(images_testing_pca_noise, labels_testing))

    dtc_score = np.array(dtc_score)
    dtc_pca_score = np.array(dtc_pca_score)
    accuracies_df = pd.DataFrame({
        'Scores': dtc_score,
        'PCA_Scores': dtc_pca_score
    }).to_csv('DecisionTreeAccuracies_noise.csv', index=False)
예제 #16
0
from mnist.loader import MNIST
import random
import numpy as np

mndata = MNIST('data')
features, labels = mndata.load_training()
images, tlabels = mndata.load_testing()
features = features + images
features = np.array(features)
labels = labels + tlabels
labels = np.reshape(np.array(labels),(-1,1))

for i in range(50):
	from PIL import Image
	img = Image.fromarray(np.reshape(features[i].astype(np.uint8),(28,28)), 'L')
	img.resize((256,256)).save('HD{}.png'.format(labels[i]))
'''
labels = labels == 8

ones = np.array([i for i in range(labels.shape[0]) if labels[i]==1])
np.random.shuffle(ones)
ones = np.reshape(ones,(5, -1))

#keys to feature where label is 0
zeros = np.array([i for i in range(labels.shape[0]) if labels[i]==0])
np.random.shuffle(zeros)
zeros = np.reshape(zeros,(5, -1))

ind = list(range(5))
ind = np.delete(ind, 2)
print(zeros[ind].flatten())
from keras.layers import Dense, Activation
from keras import optimizers, regularizers
from keras.models import Sequential
import time
import numpy as np
import cv2
from mnist.loader import MNIST

m = MNIST('./data')
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
x_train, y_train = m.load_training()
x_test, y_test = m.load_testing()
x_train = np.asarray(x_train).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)
x_test = np.asarray(x_test).astype(np.float32)
y_test = np.asarray(y_test).astype(np.float32)

n_classes = len(classes)
#0-1 Hot encoding
label_train = np.zeros((y_train.shape[0], n_classes))
a = np.arange(y_train.shape[0], dtype=np.int64)
b = np.array(y_train, dtype=np.int64).reshape((y_train.shape[0], ))
label_train[a, b] = 1
label_test = np.zeros((y_test.shape[0], n_classes))
c = np.arange(y_test.shape[0], dtype=np.int64)
d = np.array(y_test, dtype=np.int64).reshape((y_test.shape[0], ))
label_test[c, d] = 1
Next, we explore the use of an affine transformation of the pixel values from 
d features to p features in an effor to increase our accuracy. We finally see 
performance of choosing a p and its test accuracy. 

@author: Christopher Salazar
"""
import numpy as np
from mnist.loader import MNIST
import matplotlib.pyplot as plt

# Load the MNIST Dataset
mndata = MNIST(
    r'C:\Users\salez\Documents\MISE Work\CSE 546\Homework\HW1\Programming\mnist'
)
X_train, labels_train = map(np.array, mndata.load_training())
X_test, labels_test = map(np.array, mndata.load_testing())
X_train = X_train / 255.0  # normalize dataset
X_test = X_test / 255.0

# Transform X_train and X_test into corresponding Y form
d = len(X_train[0])
# Number of outputs
k = 10

# Construct Y_train array per the form required for closed form solution
n_train = len(X_train)
Y_train = np.zeros((n_train, k))

for i in range(n_train):
    Y_train[i][labels_train[i]] = 1
예제 #19
0
        runs = 10
        loss = "mse"
        opt = "gd"
        io = "../networks/mnist.obj"
        graph = False
        layers = [16, 16, 10]

    print(f"learnRate: {learnRate}")
    print(f"batch: {batch}")
    print(f"runs: {runs}")
    print(f"loss: {loss}")
    print(f"optimizer: {opt}")
    print(f"io: {io}")
    print(f"graph: {graph}")
    mndata = MNIST('./samples/numbers')
    trainData, trainLabels = mndata.load_training()
    #trainData = [[(random.random()*2)-1, (random.random()*2)-1] for i in range(1000)]
    #trainLabels = [int(i[0]-0.5>i[1] or i[0]+0.5<i[1]) for i in trainData]
    input_size = len(trainData[0])
    print(f"architecture: {input_size}, {layers}\n")

    convnn = getNetwork(io, learnRate, batch, loss, opt, layers, input_size)
    convnn = train(runs, convnn, trainData, trainLabels, batch, graph)
    convnn.writeNetwork(io)
    print("training done")
    print(" ")
    testData, testLabels = mndata.load_testing()
    #testData = [[random.random()*2-1, random.random()*2-1] for i in range(20)]
    #testLabels = [int(i[0]-0.5>i[1]or i[0]+0.5<i[1]) for i in testData]
    results = test(convnn, testData, testLabels)
    print(f"final percentage: {results}")
예제 #20
0
"""
Render the image in each of it's retinal 'zones'
This will be the basis of what each retinal unit views as we progress.
Not that the pixels that will have the strongest stimuli are white (as they are closer to 1), areas of least stimuli are black (value close to 0).
"""

import random
import numpy as np
import matplotlib.pyplot as plt

from mnist.loader import MNIST
mndata = MNIST('/home/niloofar/snn-Akhil/Spiking-Neural-Network/mnist')
images, labels = mndata.load_training()


def get_next_image(index=0, pick_random = False, display=True):
    if pick_random:
        index = random.randint(0, len(images)-1)
    image = images[index]
    label = labels[index]
    if display:
        print('Label: {}'.format(label))
        print(mndata.display(image))
    image = np.asarray(image).reshape((28,28))
    image_norm = (image * 255.0/image.max()) / 255.
    return image_norm, label


def graph_retinal_image(image, stride):
    fig = plt.figure()
예제 #21
0
파일: extract.py 프로젝트: wruoting/MADGE
from MNISTModule.classification_set import ClassificationSet
from MNISTModule.classification import Classification
from mnist.loader import MNIST
import numpy as np
import os

samples_path = './SampleData/MNIST'
write_path = './accuracy.txt'
mndata = MNIST(samples_path)

mnist_training_set = ClassificationSet()

if os.path.exists(write_path):
    mode = 'a+'
else:
    f = open(write_path, "w+")
test_sigma = 8.5
images_training, labels_training = mndata.load_training()
images_testing, labels_testing = mndata.load_testing()
classification = Classification(images_training,
                                labels_training,
                                images_testing,
                                labels_testing,
                                sigma=test_sigma)
classification.create_model()
print(classification.calculate_accuracy(mode='verbose'))
# with open(write_path, 'a+') as file:
#     file.write(str(test_sigma) + ',' + classification.calculate_accuracy(calculate=False, mode='return'))
#     file.write('\n')
예제 #22
0
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from mnist.loader import MNIST
import numpy as np

if __name__ == '__main__':
    mnist = MNIST('../input')
    x_train, y_train = mnist.load_training() 
    #x_test, y_test = mnist.load_testing() 

    x_train = np.asarray(x_train).astype(np.float32)
    y_train = np.asarray(y_train).astype(np.int32)
    #x_test = np.asarray(x_test).astype(np.float32)
    #y_test = np.asarray(y_test).astype(np.int32)

    print(f'Shape of X Train is {x_train.shape}')

    df = pd.DataFrame(data=x_train)
    df["label"] = y_train
    df = df.sample(frac=1).reset_index(drop=True)
    df['kfold'] = -1

    kf = StratifiedKFold(n_splits=5)

    for f, (t_, v_) in enumerate(kf.split(X=df, y=y_train)):
        df.loc[v_, 'kfold'] = f
    
    print(df.head())

    df.to_csv("../input/mnist_train_folds.csv", index=False)
예제 #23
0
                plt.plot()
        return loss_history


class LinearSVM(LinearClassifier):
    def loss(self, X_batch, y_batch, reg):
        return compute_loss(self.W, X_batch, y_batch, reg)


# In[5]:

mndata = MNIST('E:/Grad School/Semester 2/ML/Homeworks/MNIST/')

# In[6]:

X_train, Y_train = mndata.load_training()  #60000 samples
X_test, Y_test = mndata.load_testing()  #10000 samples

# In[7]:

X_train = np.asarray(X_train).astype(np.float32)
Y_train = np.asarray(Y_train).astype(np.int32)
X_test = np.asarray(X_test).astype(np.float32)
Y_test = np.asarray(Y_test).astype(np.int32)

# In[8]:

W = np.random.randn(784, 10) * 0.0001
plotnum1 = []

# In[9]:
예제 #24
0
https://umap-learn.readthedocs.io/en/latest/supervised.html#using-labels-to-separate-classes-supervised-umap
公式リファレンス

MNISTデータを使うために "https://pypi.org/project/python-mnist/" を参照してダウンロード、インストールしておく

教師付き次元削減とMetric learning
"""
import numpy as np
from mnist.loader import MNIST
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='white', context='poster')

mndata = MNIST('/Users/hamamatsuikadaigakubyouribu/UMAP_python/python-mnist/data')
mndata
train, train_labels = mndata.load_training()
test, test_labels = mndata.load_testing()
data = np.array(np.vstack([train, test]), dtype=np.float64) / 255.0
target = np.hstack([train_labels, test_labels])
classes = [
    'T-shirt/top',
    'Trouser',
    'Pullover',
    'Dress',
    'Coat',
    'Sandal',
    'Shirt',
    'Sneaker',
    'Bag',
    'Ankle boot']
n = 200
start = time.time()
xp, yp, xn, yn = createdata(1)
crossvalid(xp, yp, m, n, 0, 0)
end = time.time()
print("Time elapsed while crossvalid: {:.3f} ".format(end - start), "s")

# In[44]:

d = 784
# read data and preprocess
from mnist.loader import MNIST

mndata = MNIST('')

X_train, Y_train = mndata.load_training()
# or
X_test, Y_test = mndata.load_testing()
xp = np.empty((0, 784), int)
yp = np.empty((0, 1), int)
xn = np.empty((0, 784), int)
yn = np.empty((0, 1), int)
X_opt = np.empty((0, 784), int)
Y_opt = np.empty((0, 1), int)
for i in reversed(range(len(Y_train))):
    if Y_train[i] == 1:
        xp = np.append(xp, [X_train[i]], axis=0)
        yp = np.append(yp, [Y_train[i]])
    elif Y_train[i] == 0:
        xn = np.append(xn, [X_train[i]], axis=0)
        yn = np.append(yn, [Y_train[i]])