예제 #1
def PreDataset():
    cifar10_dir = 'data/cifar10'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    VisualizeImg(X_train, y_train)
    input("Enter any key to Cross-validation...")

    num_train = 49000
    num_val = 1000
    # dataset validation
    sample_index = range(num_train, num_train + num_val)
    X_val = X_train[sample_index]
    y_val = y_train[sample_index]
    X_train = X_train[:num_train]
    y_train = y_train[:num_train]

    # 零中心化
    X_train -= np.mean(X_train)
    X_val -= np.mean(X_val)
    X_test -= np.mean(X_test)

    # VisualizeImg(X_train, y_train) #零中心化效果显示

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    # 为偏置b在X上最后一列添加1
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #2
def knn_predict():
    if len(sys.argv) < 3:
        print "need at least 2 parameters"

    model = sys.argv[1]
    param1 = sys.argv[2]

    cifar10_dir = '../cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    #X_train, y_train = load_CIFAR_batch(cifar10_dir + '/data_batch_1')

    X_train = X_train[:1000]
    y_train = y_train[:1000]
    test_data = load_test_data()

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    y_train = np.reshape(y_train, (y_train.shape[0], -1))
    test_data = np.reshape(test_data, (test_data.shape[0], -1))

    y_pred = do_LogisticRegression(X_train, y_train, test_data, C=66)

    print y_pred.shape

    scp_file = 'test.scp'
    fin = codecs.open(scp_file, 'r')
    images = fin.readlines()

    assert (len(images) == y_pred.shape[0])
    output = codecs.open('prediction.txt', 'w')
    for i in range(len(images)):
        basename = images[i].split('\n')[0]
        output.write(basename + ' ' + str(y_pred[i]) + '\n')
예제 #3
def get_CIFAR10_data(num_training=5000, num_validation=1000, num_test=500):
    """ On télécharge ici à partir du dossier et on prepare les données à être recu par le reseau de neuronne

    # Chargerment des données brutes.
    cifar10_dir = '../../datasets/cifar-10-batches-py/'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Sous ensemble des données
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalisation des données, on soustrait la moyenne.
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val = X_val - mean_image
    X_test = X_test - mean_image
    X_train = X_train.swapaxes(1, 3)
    X_val = X_val.swapaxes(1, 3)
    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #4
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):
    root_dir = '../dataset/cifar-10-batches-py'
    X_train, Y_train, X_test, Y_test = data_utils.load_CIFAR10(root_dir)
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    Y_val = Y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    Y_train = Y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    Y_test = Y_test[mask]

    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    Y_dev = Y_train[mask]
    #reshape and subtract the mean
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))
    mean_image = np.mean(X_train, axis=0)             # 这一步是将数据数据零均值化
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image
    return X_train, Y_train, X_val, Y_val, X_test, Y_test, X_dev, Y_dev
예제 #5
def pre_dataset(path):
    X_train, y_train, X_test, y_test = load_CIFAR10(path)

    num_train = 9000
    num_val = 1000

    mask = range(num_train, num_train + num_val)
    X_val = X_train[mask]
    y_val = y_train[mask]
    X_train = X_train[:num_train]
    y_train = y_train[:num_train]

    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    print('Train data shape: {}'.format(X_train.shape))
    print('Train labels shape: {}'.format(y_train.shape))
    print('Validation data shape: {}'.format(X_val.shape))
    print('Validation labels shape: {}'.format(y_val.shape))
    print('Test data shape: {}'.format(X_test.shape))
    print('Test labels shape: {}'.format(y_test.shape))
    return X_train, y_train, X_test, y_test, X_val, y_val
예제 #6
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    Use the cs231n data_utils.py script to load the data
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #7
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    Train data shape:  (49000, 32, 32, 3)
    Train labels shape:  (49000,)
    # Load the raw CIFAR-10 data
    cifar10_dir = 'datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #8
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):

    #Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    #it for the two-layer neural net classifier. These are the same steps as
    #we used for the SVM, but condensed to a single function.

    # Load the raw CIFAR-10 data
    cifar10_dir = 'datasets/cifar-10'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #9
    def load_CIFAR10(self):
            Load the raw CIFAR-10 data.
            show a few examples of training images from each class.
        cifar10_dir = 'datasets/cifar-10'
        X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

        # As a sanity check, we print out the size of the training and test data.
        print 'Training data shape: ', X_train.shape  # (50000, 32, 32, 3)
        print 'Training labels shape: ', y_train.shape  # (50000,)
        print 'Test data shape: ', X_test.shape  # (10000, 32, 32, 3)
        print 'Test labels shape: ', y_test.shape  # (10000,)

        # 可视化其中部分样本

        classes = [
            'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
            'ship', 'truck'
        num_classes = len(classes)
        samples_per_class = 7  # 每个类别可视化 7 个样本
        for y, clas in enumerate(classes):
            print "\n\ny =", y, "class =", clas
            idxs = np.flatnonzero(y_train == y)  # 转为1维数组后 y_train = y 的元素下标
            print "In training set, the number of class <", y, "> is", len(
            idxs = np.random.choice(idxs, samples_per_class,
                                    replace=False)  # 从中随机选取 7 个样本
            print "randomly select", samples_per_class, "samples in this set, these subscript is:", idxs[:]
            print "plt_index = ",
            for i, idx in enumerate(idxs):
                plt_idx = i * num_classes + y + 1
                print plt_idx,
                plt.subplot(samples_per_class, num_classes, plt_idx)
                plt.imshow(X_train[idx].astype('uint8'))  # 显示图片
                if i == 0:
        # 保存图片
        if not os.path.exists("visual_CIFAR10.jpg"):

        # 在本次实验中为了使得训练速度更快,因此抽样训练前5000个样本
        print "sampling...   reshape..."
        X_train = X_train[:5000]
        y_train = y_train[:5000]
        X_test = X_test[:500]
        y_test = y_test[:500]

        # 将矩阵 reshape 成 (nb_samples, nb_features) 的形式
        X_train = np.reshape(X_train, (X_train.shape[0], -1))
        X_test = np.reshape(X_test, (X_test.shape[0], -1))
        print "X_train.shape =", X_train.shape, "X_test.shape =", X_test.shape

        # 赋值
        self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test
        print "----"
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. 
    # Load the raw CIFAR-10 data
    cifar10_dir = './cifar-10-batches-py'

    data_train_all, label_train_all, data_test, label_test = load_CIFAR10(cifar10_dir)
    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    data_val = data_train_all[mask]
    label_val = label_train_all[mask]
    mask = range(num_training)
    data_train = data_train_all[mask]
    label_train = label_train_all[mask]
    mask = range(num_test)
    data_test = data_test[mask]
    label_test = label_test[mask]  

    # Normalize the data: subtract the mean image
    mean_image = np.mean(data_train_all, axis=0)
    mean_test_image = np.mean(data_test,axis = 0)
    data_train_m = data_train - mean_image
    data_val_m = data_val - mean_image
    data_test_m = data_test - mean_test_image

    return data_train_m, label_train, data_val_m, label_val, data_test_m, label_test
예제 #11
def make_cifar10_dataset(cifar_dir, n_validation=0, vectorize=False):
    NUM_CLASSES = 10
    NUM_TRAIN = 50000
    NUM_TEST = 10000

    cifar10_dir = '/home/elfeki/Workspace/VAE_DPP/CIFAR/data/cifar10/'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # reshape to vectors
    if vectorize:
        X_train = np.reshape(X_train, (X_train.shape[0], -1))
        X_test = np.reshape(X_test, (X_test.shape[0], -1))

    # make one-hot coding
    y_train_temp = np.zeros((NUM_TRAIN, NUM_CLASSES))
    for i in range(NUM_TRAIN):
        y_train_temp[i, y_train[i]] = 1
    y_train = y_train_temp

    y_test_temp = np.zeros((NUM_TEST, NUM_CLASSES))
    for i in range(NUM_TEST):
        y_test_temp[i, y_test[i]] = 1
    y_test = y_test_temp

    # make validation set
    X_valid = X_train[:n_validation]
    X_train = X_train[n_validation:]

    y_valid = y_train[:n_validation]
    y_train = y_train[n_validation:]

    return (X_train, y_train, X_valid, y_valid, X_test, y_test)
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    # Load the raw CIFAR-10 data
    cifar10_dir = './datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #13
def pre_dataset():
    cifar10_dir = 'datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    VisualizeImage(X_train, y_train)
    input('Enter any key to Cross-validation...')

    num_train = 49000
    num_val = 1000
    # dataset Validation
    sample_index = range(num_train, num_train + num_val)
    X_val = X_train[sample_index]
    y_val = y_train[sample_index]
    X_train = X_train[:num_train]
    y_train = y_train[:num_train]

    # subtract the mean image
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))

    mean_image = np.mean(X_train, axis=0)
    X_train = X_train - mean_image
    X_test = X_test - mean_image
    X_val = X_val - mean_image

    # add a parameter for W
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])

    return X_train, y_train, X_test, y_test, X_val, y_val
예제 #14
def get_CIFAR10_data(num_training = 49000, num_validation = 1000, num_test = 10000):
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    # Load the raw CIFAR-10 data
    cifar10_dir = 'E:/research/CS231n/cifar-10-python/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis = 0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #15
def main():
    cifar10_dir = '../datasets/cifar-10-batches-py'

    # Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    num_training = 5000
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]

    num_test = 500
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    print(X_train.shape, X_test.shape)
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    dists = classifier.compute_distances_two_loops(X_test)
    y_test_pred = classifier.predict_labels(dists, k=1)

    # Compute and print the fraction of correctly predicted examples
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print('Got %d / %d correct => accuracy: %f' %
          (num_correct, num_test, accuracy))

    y_test_pred = classifier.predict_labels(dists, k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print('Got %d / %d correct => accuracy: %f' %
          (num_correct, num_test, accuracy))

    dists_one = classifier.compute_distances_one_loop(X_test)

    # To ensure that our vectorized implementation is correct, we make sure that it
    # agrees with the naive implementation. There are many ways to decide whether
    # two matrices are similar; one of the simplest is the Frobenius norm. In case
    # you haven't seen it before, the Frobenius norm of two matrices is the square
    # root of the squared sum of differences of all elements; in other words, reshape
    # the matrices into vectors and compute the Euclidean distance between them.
    difference = np.linalg.norm(dists - dists_one, ord='fro')
    print('One loop difference was: %f' % (difference, ))
    if difference < 0.001:
        print('Good! The distance matrices are the same')
        print('Uh-oh! The distance matrices are different')

    dists_two = classifier.compute_distances_no_loops(X_test)

    # check that the distance matrix agrees with the one we computed before:
    difference = np.linalg.norm(dists - dists_two, ord='fro')
    print('No loop difference was: %f' % (difference, ))
    if difference < 0.001:
        print('Good! The distance matrices are the same')
        print('Uh-oh! The distance matrices are different')
예제 #16
def main(dataset):
	# Load the CIFAR Data
	print "Loading data.."
	Xtr, Ytr, Xte, Yte = data_utils.load_CIFAR10(dataset)
	print "Loaded data!"
	Xtr = flatten(Xtr)
	Xte = flatten(Xte)

	mean_image = np.mean(Xtr, axis=0)
	Xtr = preProcess(Xtr, mean_image)
	Xte = preProcess(Xte, mean_image)

	N, D = Xtr.shape
	vSize = N * 20 / 100  # Set aside 20% of data for validation

	# Create network and run training
	nn = network.TwoLayerNet(3072, 1024, 10)
	stats = nn.train(Xtr[vSize:], Ytr[vSize:], Xtr[:vSize], Ytr[:vSize], verbose=False)
	# Do not print stats..
	#print stats['train_acc_history']
	#print stats['loss_history']
	#print stats['val_acc_history']
	# Test accuracy
	print "Training accuracy: %.2f" % stats['train_acc_history'][-1]
	print "Validation accuracy: %.2f" % stats['val_acc_history'][-1]
	print "Testing accuracy: %.2f" % (nn.accuracy(Xte, Yte)*100)
예제 #17
def make_bearing_dataset(data_dir,
    NUM_CLASSES = 10

    X_train, y_train, X_test, y_test = load_CIFAR10(data_dir)
    # 39600, 32, 32, 1   39600,
    #  3750, 32, 32, 1   3750,

    NUM_TRAIN = X_train.shape[0]
    NUM_TEST = X_test.shape[0]
    # reshape to vectors
    if vectorize:
        X_train = np.reshape(X_train, (X_train.shape[0], -1))  # 39600, 1024
        X_test = np.reshape(X_test, (X_test.shape[0], -1))  # 3750, 1024

    # make one-hot coding
    y_train_temp = np.zeros((NUM_TRAIN, NUM_CLASSES))
    for i in range(NUM_TRAIN):
        y_train_temp[i, y_train[i]] = 1
    y_train = y_train_temp  # 39600, 10

    y_test_temp = np.zeros((NUM_TEST, NUM_CLASSES))
    for i in range(NUM_TEST):
        y_test_temp[i, y_test[i]] = 1
    y_test = y_test_temp  # 3750, 10

    X_train_labeled, y_train_labled = draw_labeled_data(
        X_train, y_train, labeled_sample_per_category=num_labeled_samples)

    return (X_train, y_train, X_train_labeled, y_train_labled, X_test, y_test)
def serialize_data():
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    datetime_str = datetime.datetime.today().strftime('%Y%m%d-%H:%M:%S')
    serialize_cifar_pool3(X_train, 'X_train_' + datetime_str)
    serialize_cifar_pool3(X_test, 'X_test_' + datetime_str)
    np.save('y_train_' + datetime_str, y_train)
    np.save('y_test_' + datetime_str, y_test)
예제 #19
def pre_dataset():
    cifar10_dir = 'D:/dataset/cifar-10-python/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    VisualizeImage(X_train, y_train)

    num_train = 49000
    num_val = 1000

    mask = range(num_train, num_train + num_val)
    X_val = X_train[mask]
    y_val = y_train[mask]
    X_train = X_train[:num_train]
    y_train = y_train[:num_train]

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))

    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # add a parameter for W
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])

    return X_train, y_train, X_test, y_test, X_val, y_val
예제 #20
def serialize_data():
    X_train, y_train, X_test, y_test = load_CIFAR10(
    )  # Change this line to take the sketches dataset as input using input_data_sketches.read_data_sets() for testing with sketches
    serialize_cifar_pool3(X_train, 'X_train_1')
    serialize_cifar_pool3(X_test, 'X_test_1')
    np.save('y_train_1', y_train)
    np.save('y_test_1', y_test)
예제 #21
파일: main.py 프로젝트: Zaru238/Courses
def get_CIFAR10_data(num_training=49000,
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the linear classifier. These are the same steps as we used for the
    SVM, but condensed to a single function.  
    # Load the raw CIFAR-10 data
    cifar10_dir = '../../../assignment1/cs231n/datasets/cifar-10-batches-py'

    # Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
        del X_train, y_train
        del X_test, y_test
        print('Clear previously loaded data.')

    X_train, y_train, X_test, y_test = data_utils.load_CIFAR10(cifar10_dir)

    # subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
예제 #22
def load_and_process(location = None):
    Xtr, Ytr, Xte, Yte = None, None, None, None,
    if location is None:
    	Xtr, Ytr, Xte, Yte = data_utils.load_CIFAR10(os.path.join(os.getcwd(),'cifar-10-batches-py'))
	Xtr, Ytr, Xte, Yte = data_utils.load_CIFAR10(os.path.join(os.getcwd(),location,'cifar-10-batches-py'))
    Xtr, Xte = np.reshape(Xtr,(Xtr.shape[0], 3072)), np.reshape(Xte, (Xte.shape[0], 3072))
    feature_maxes = np.abs(Xtr).max(axis = 0)
    Xtr = Xtr/feature_maxes
    Xte = Xte/feature_maxes
    mean_image = np.mean(Xtr, axis = 0)
    Xtr -= mean_image
    Xte -= mean_image
    #end preprocessing
    Xtr, Ytr = nn.shuffle_training_sets(Xtr,Ytr)
    training_set_size = Xtr.shape[0]
    Xtrain, Xval = Xtr[:int(training_set_size*.9)],Xtr[int(training_set_size*.9):]
    Ytrain, Yval = Ytr[:int(training_set_size*.9)], Ytr[int(training_set_size*.9):]
    return Xtrain, Ytrain, Xval, Yval, Xte, Yte
def getimage(image, batch_size, trainnum=2000, testnum=500):

    train_image = []
    train_label = []
    test_image = []
    test_label = []
    if image == 'FID':
        image = os.walk(r'D:\360download\FIDS30')
        classnum = 0
        for i in image:
            if i[1] == []:

                imagepath = glob.glob('%s\\*.jpg' % (i[0]))

                for i in range(len(imagepath[0:-5])):  #取后五张作为测试数据,其余训练
                for i in range(5):
                    test_image.append(imagepath[i - 6])
                classnum = classnum + 1
        # 调用图片生成器,把训练集图片转换成三维数组
        tr_data = ImageDataGenerator(images=train_image,

        # 调用图片生成器,把测试集图片转换成三维数组
        test_data = ImageDataGenerator(images=test_image,
        tr_data = tr_data.data
        test_data = test_data.data
        return tr_data, test_data, classnum
    if image == 'cifar10':
        cifar10_dir = 'cifar-10-batches-py'
        X_train, y_train, X_test, y_test = load_CIFAR10(
            cifar10_dir)  #加载cifar数据
        train_image = X_train[list(range(trainnum))]
        train_label = y_train[list(range(trainnum))]
        test_image = X_test[list(range(testnum))]
        test_label = y_test[list(range(testnum))]
        classnum = 10
        tr_data = Dataset.from_tensor_slices((train_image, train_label))
        tr_data = tr_data.map(resize)
        tr_data = tr_data.batch(batch_size)
        test_data = Dataset.from_tensor_slices((test_image, test_label))
        test_data = test_data.map(resize)
        test_data = test_data.batch(batch_size)
        return tr_data, test_data, classnum
예제 #24
    def load_data(self):
        print "load cifar-10 data..."
        # Load the raw CIFAR-10 data.
        cifar10_dir = 'datasets/cifar-10'
        X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

        # As a sanity check, we print out the size of the training and test data.
        print 'Training data shape: ', X_train.shape
        print 'Training labels shape: ', y_train.shape
        print 'Test data shape: ', X_test.shape
        print 'Test labels shape: ', y_test.shape
        self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test
        print "load data done...\n---------\n"
예제 #25
def generate_hog_data():

    hog_X_train = np.load("hog_X_train.npy")
    hog_X_test = np.load("hog_X_test.npy")

    cifar10_dir = 'cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    X_train, y_train = extract_CIFAR10_samples(hog_X_train, y_train,
                                               X_train.shape[0] / 5)
    X_test, y_test = extract_CIFAR10_samples(hog_X_test, y_test,
                                             X_train.shape[0] / 5)
    np.save("X_hog_train", X_train)
    np.save("y_hog_train", y_train)
    np.save("X_hog_test", X_test)
    np.save("y_hog_test", y_test)
예제 #26
def get_CIFAR10_data(num_training=49000,
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the linear classifier. These are the same steps as we used for the
    SVM, but condensed to a single function.
    # Load the raw CIFAR-10 data
    cifar10_dir = 'datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
예제 #27
def gen_datasets(cifar10_dir,

    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    # Split the data into train, val, and test sets. In addition we will
    # create a small development set as a subset of the training data;
    # we can use this for development so our code runs faster.

    # Our validation set will be num_validation points from the original
    # training set.
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]

    # Our training set will be the first num_train points from the original
    # training set.
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]

    # We will also make a development set, which is a small subset of
    # the training set.
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # We use the first num_test points of the original test set as our
    # test set.
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    datasets = {}
    datasets['X_train'] = X_train
    datasets['X_val'] = X_val
    datasets['X_dev'] = X_dev
    datasets['X_test'] = X_test
    datasets['y_train'] = y_train
    datasets['y_dev'] = y_dev
    datasets['y_test'] = y_test
    datasets['y_val'] = y_val

    return datasets
예제 #28
def load_data_set():
    # 加载数据集
    cifar10_dir = '../cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # 将单幅图片转成 3072 维的向量
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))

    # 根据课程需要,将训练集缩小为 1/5
    X_train, y_train = extract_CIFAR10_samples(X_train, y_train,
    X_test, y_test = extract_CIFAR10_samples(X_test, y_test, X_test.shape[0])
    np.save("X_train", X_train)
    np.save("y_train", y_train)
    np.save("X_test", X_test)
    np.save("y_test", y_test)
    return X_train, y_train, X_test, y_test
예제 #29
def get_whitened_image():
    cifar10_dir = '../../data/cifar10'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    X = np.concatenate((X_train, X_test),axis=0)
    y = np.concatenate((y_train, y_test),axis=0)
    # reshape x to (60000,1024,3)
    X = X.reshape((X.shape[0],X.shape[1]*X.shape[2],X.shape[3]))
    # normalization
    print('Global contrast normalization...')
    X = X.transpose((0,2,1)) # (60000,3,1024)
    X -= np.mean(X,axis=2).reshape((X.shape[0],X.shape[1],1))
    # ZCA whitening
    print('ZCA whitening...')
    for i in range(3):
        X[:,i] = zca_whitening(X[:,i])

    # save as (3,60000,1024)
예제 #30
def get_CIFAR10_data(num_training=49000,
    # Load the raw CIFAR-10 data
    cifar10_dir = 'datasets/cifar-10'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
예제 #31
파일: main.py 프로젝트: Zaru238/Courses
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    # Load the raw CIFAR-10 data
    cifar10_dir = '../../../assignment1/cs231n/datasets/cifar-10-batches-py'

    # Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
        del X_train, y_train
        del X_test, y_test
        print('Clear previously loaded data.')

    X_train, y_train, X_test, y_test = data_utils.load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    # Load the raw CIFAR-10 data
    cifar10_dir = r'machine_learning_study/dataset/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    # 49000-50000作为val
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    # 0-49000作为训练集
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    # 0-1000作为测试集
    X_test = X_test[mask]
    y_test = y_test[mask]

    return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    cifar10_dir = './'+dataset_dir+'/cifar-10-batches-py'
    print cifar10_dir
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    return X_train, y_train, X_val, y_val, X_test, y_test
import neural_net, data_utils
import numpy as np
import matplotlib.pyplot as plt
import time
if __name__ == '__main__':

    start_time = time.time()

    input_size = 3072
    hidden_size = 500
    output_size =10
    momentum =0.95

    X_train, y_train, X_test, y_test = data_utils.load_CIFAR10("C:\Users\SHARATH\Git\cs291k-mp1\dataset")
    nn = neural_net.TwoLayerNet(input_size, hidden_size, output_size, 0.00001, momentum)

    #Configuration Parameters
    training_size =49000
    test_size = 10000
    validation_size = 1000

    learning_rate = 0.0001
    learning_rate_decay = 0.95
    reg = 0.01
    num_iters = 20000
    batch_size = 500
    verbose = True

    # Subsample the data
    mask = range(training_size, training_size + validation_size)
    X_val = X_train[mask]
예제 #35
        num_test = X_test.shape[0]
        Ypred = np.zeros(num_test, dtype = self.Ytrain.dtype)

        # loop over all test rows
        for i in xrange(num_test):
        # find the nearest training image to the i'th test image
        # using the L1 distance (sum of absolute value differences)
            distances = np.sum(np.abs(self.Xtrain - Xtest[i,:]), axis = 1)
            min_index = np.argmin(distances) # get the index with smallest distance
            Ypred[i] = self.Ytrain[min_index] # predict the label of the nearest example
            if(i % 5==0):
                print i
        return Ypred
a= L1Distance()
cifar10_dir = '/root/cs231n/assignment1/cs231n/datasets/cifar-10-batches-py/' 
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
print 'X_train shape:', X_train.shape
print 'y_train shape:', y_train.shape
print 'X_test shape:', X_test.shape
print 'y_test shape:', y_test.shape

#why do we need shape[0]?      
print 'accuracy: %f' % ( np.mean(yte_predict == y_test) )

from data_utils import load_CIFAR10
from k_nearest_neighbour import KNearestNeighbour
import numpy as np

Xtr,Ytr,Xte,Yte=load_CIFAR10('dataset/')#loaded Cifar10 data set as training set Xtr, labels of training set as Ytr, Xte of training set,Yte of Training set 

"""Converting Image data set to Raw Date Format"""

print "Efficiency in prediction %f for k=%d" % (np.mean(Y_pred==Yte),K)
예제 #37
	num_samples = Xtr.shape[0]

	Xval = Xtr[num_samples*0.8:]
	Yval = Ytr[num_samples*0.8:]
	Xtr = Xtr[:num_samples*0.8]
	Ytr = Ytr[:num_samples*0.8]

	return Xtr, Ytr, Xval, Yval

inputsize = 32*32*3
outputsize = 10

##Load Dataset
dir = os.path.dirname(__file__)
rootname = os.path.join(dir, 'dataset/cifar-10-batches-py')
Xtr, Ytr, Xte, Yte = load_CIFAR10(rootname)
Xtr = Xtr.reshape(50000,3072)
Xte = Xte.reshape(10000,3072)
Xtr, Ytr, Xval, Yval = split_strategy(Xtr, Ytr)

#define the hyper parameters
hiddenlayer_size_arg = 500
batch_size_arg = 2000
num_iters_arg =1000
learning_rate_arg =0.002
learning_rate_decay_arg =0.98
verbose = False
###### uncomment the following section to print the value of parameters ######
print "params values:"
예제 #38
import numpy as np
import time

if __name__ == '__main__':

    start_time = time.time()
    file_location = sys.argv[1]+"/cifar-10-batches-py"
    print file_location

    input_size = 3072
    hidden_size = 500
    output_size =10
    momentum =0.95

    X_train, y_train, X_test, y_test = data_utils.load_CIFAR10(file_location)
    nn = neural_net.TwoLayerNet(input_size, hidden_size, output_size, 0.00001, momentum)

    #Configuration Parameters
    training_size =49000
    test_size = 10000
    validation_size = 1000

    learning_rate = 0.0001
    learning_rate_decay = 0.95
    reg = 0.01
    num_iters = 20000
    batch_size = 500
    verbose = True

    mask = range(training_size, training_size + validation_size)