예제 #1
0
def PreDataset():
    cifar10_dir = 'data/cifar10'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    VisualizeImg(X_train, y_train)
    input("Enter any key to Cross-validation...")

    num_train = 49000
    num_val = 1000
    # dataset validation
    sample_index = range(num_train, num_train + num_val)
    X_val = X_train[sample_index]
    y_val = y_train[sample_index]
    X_train = X_train[:num_train]
    y_train = y_train[:num_train]

    # 零中心化
    X_train -= np.mean(X_train)
    X_val -= np.mean(X_val)
    X_test -= np.mean(X_test)

    # VisualizeImg(X_train, y_train) #零中心化效果显示

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    # 为偏置b在X上最后一列添加1
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #2
0
def knn_predict():
    if len(sys.argv) < 3:
        print "need at least 2 parameters"
        exit(1)

    model = sys.argv[1]
    param1 = sys.argv[2]

    cifar10_dir = '../cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    #X_train, y_train = load_CIFAR_batch(cifar10_dir + '/data_batch_1')

    X_train = X_train[:1000]
    y_train = y_train[:1000]
    test_data = load_test_data()

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    y_train = np.reshape(y_train, (y_train.shape[0], -1))
    test_data = np.reshape(test_data, (test_data.shape[0], -1))

    y_pred = do_LogisticRegression(X_train, y_train, test_data, C=66)

    print y_pred.shape

    scp_file = 'test.scp'
    fin = codecs.open(scp_file, 'r')
    images = fin.readlines()
    fin.close()

    assert (len(images) == y_pred.shape[0])
    output = codecs.open('prediction.txt', 'w')
    for i in range(len(images)):
        basename = images[i].split('\n')[0]
        output.write(basename + ' ' + str(y_pred[i]) + '\n')
    output.close()
예제 #3
0
def get_CIFAR10_data(num_training=5000, num_validation=1000, num_test=500):
    """ On télécharge ici à partir du dossier et on prepare les données à être recu par le reseau de neuronne
  
    """

    # Chargerment des données brutes.
    cifar10_dir = '../../datasets/cifar-10-batches-py/'
    print(cifar10_dir)
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Sous ensemble des données
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalisation des données, on soustrait la moyenne.
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val = X_val - mean_image
    X_test = X_test - mean_image
    X_train = X_train.swapaxes(1, 3)
    X_val = X_val.swapaxes(1, 3)
    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #4
0
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500):
    """
    """
    root_dir = '../dataset/cifar-10-batches-py'
    X_train, Y_train, X_test, Y_test = data_utils.load_CIFAR10(root_dir)
    
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    Y_val = Y_train[mask]
    
    mask = list(range(num_training))
    X_train = X_train[mask]
    Y_train = Y_train[mask]
    
    mask = list(range(num_test))
    X_test = X_test[mask]
    Y_test = Y_test[mask]

    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    Y_dev = Y_train[mask]
    
    #reshape and subtract the mean
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))
    mean_image = np.mean(X_train, axis=0)             # 这一步是将数据数据零均值化
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image
    
    return X_train, Y_train, X_val, Y_val, X_test, Y_test, X_dev, Y_dev
예제 #5
0
def pre_dataset(path):
    X_train, y_train, X_test, y_test = load_CIFAR10(path)

    num_train = 9000
    num_val = 1000

    mask = range(num_train, num_train + num_val)
    X_val = X_train[mask]
    y_val = y_train[mask]
    X_train = X_train[:num_train]
    y_train = y_train[:num_train]

    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    """
    print('Train data shape: {}'.format(X_train.shape))
    print('Train labels shape: {}'.format(y_train.shape))
    print('Validation data shape: {}'.format(X_val.shape))
    print('Validation labels shape: {}'.format(y_val.shape))
    print('Test data shape: {}'.format(X_test.shape))
    print('Test labels shape: {}'.format(y_test.shape))
    """
    return X_train, y_train, X_test, y_test, X_val, y_val
예제 #6
0
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    """
    Use the cs231n data_utils.py script to load the data
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #7
0
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    '''
    Train data shape:  (49000, 32, 32, 3)
    Train labels shape:  (49000,)
    '''
    # Load the raw CIFAR-10 data
    cifar10_dir = 'datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #8
0
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):

    #Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    #it for the two-layer neural net classifier. These are the same steps as
    #we used for the SVM, but condensed to a single function.

    # Load the raw CIFAR-10 data
    cifar10_dir = 'datasets/cifar-10'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #9
0
    def load_CIFAR10(self):
        """
            读取数据并抽样可视化
            Load the raw CIFAR-10 data.
            show a few examples of training images from each class.
        """
        cifar10_dir = 'datasets/cifar-10'
        X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

        # As a sanity check, we print out the size of the training and test data.
        print 'Training data shape: ', X_train.shape  # (50000, 32, 32, 3)
        print 'Training labels shape: ', y_train.shape  # (50000,)
        print 'Test data shape: ', X_test.shape  # (10000, 32, 32, 3)
        print 'Test labels shape: ', y_test.shape  # (10000,)

        # 可视化其中部分样本

        classes = [
            'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
            'ship', 'truck'
        ]
        num_classes = len(classes)
        samples_per_class = 7  # 每个类别可视化 7 个样本
        for y, clas in enumerate(classes):
            print "\n\ny =", y, "class =", clas
            idxs = np.flatnonzero(y_train == y)  # 转为1维数组后 y_train = y 的元素下标
            print "In training set, the number of class <", y, "> is", len(
                idxs)
            idxs = np.random.choice(idxs, samples_per_class,
                                    replace=False)  # 从中随机选取 7 个样本
            print "randomly select", samples_per_class, "samples in this set, these subscript is:", idxs[:]
            print "plt_index = ",
            for i, idx in enumerate(idxs):
                plt_idx = i * num_classes + y + 1
                print plt_idx,
                plt.subplot(samples_per_class, num_classes, plt_idx)
                plt.imshow(X_train[idx].astype('uint8'))  # 显示图片
                plt.axis('off')
                if i == 0:
                    plt.title(clas)
        # 保存图片
        if not os.path.exists("visual_CIFAR10.jpg"):
            plt.savefig("visual_CIFAR10.jpg")
            plt.show()

        # 在本次实验中为了使得训练速度更快,因此抽样训练前5000个样本
        print "sampling...   reshape..."
        X_train = X_train[:5000]
        y_train = y_train[:5000]
        X_test = X_test[:500]
        y_test = y_test[:500]

        # 将矩阵 reshape 成 (nb_samples, nb_features) 的形式
        X_train = np.reshape(X_train, (X_train.shape[0], -1))
        X_test = np.reshape(X_test, (X_test.shape[0], -1))
        print "X_train.shape =", X_train.shape, "X_test.shape =", X_test.shape

        # 赋值
        self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test
        print "----"
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. 
    """
    # Load the raw CIFAR-10 data
    
    cifar10_dir = './cifar-10-batches-py'

    data_train_all, label_train_all, data_test, label_test = load_CIFAR10(cifar10_dir)
   
    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    data_val = data_train_all[mask]
    label_val = label_train_all[mask]
    mask = range(num_training)
    data_train = data_train_all[mask]
    label_train = label_train_all[mask]
    mask = range(num_test)
    data_test = data_test[mask]
    label_test = label_test[mask]  

    # Normalize the data: subtract the mean image
    mean_image = np.mean(data_train_all, axis=0)
    mean_test_image = np.mean(data_test,axis = 0)
    data_train_m = data_train - mean_image
    data_val_m = data_val - mean_image
    data_test_m = data_test - mean_test_image

    
    return data_train_m, label_train, data_val_m, label_val, data_test_m, label_test
예제 #11
0
def make_cifar10_dataset(cifar_dir, n_validation=0, vectorize=False):
    NUM_CLASSES = 10
    NUM_TRAIN = 50000
    NUM_TEST = 10000

    cifar10_dir = '/home/elfeki/Workspace/VAE_DPP/CIFAR/data/cifar10/'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # reshape to vectors
    if vectorize:
        X_train = np.reshape(X_train, (X_train.shape[0], -1))
        X_test = np.reshape(X_test, (X_test.shape[0], -1))

    # make one-hot coding
    y_train_temp = np.zeros((NUM_TRAIN, NUM_CLASSES))
    for i in range(NUM_TRAIN):
        y_train_temp[i, y_train[i]] = 1
    y_train = y_train_temp

    y_test_temp = np.zeros((NUM_TEST, NUM_CLASSES))
    for i in range(NUM_TEST):
        y_test_temp[i, y_test[i]] = 1
    y_test = y_test_temp

    # make validation set
    X_valid = X_train[:n_validation]
    X_train = X_train[n_validation:]

    y_valid = y_train[:n_validation]
    y_train = y_train[n_validation:]

    return (X_train, y_train, X_valid, y_valid, X_test, y_test)
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = './datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
        
    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #13
0
def pre_dataset():
    cifar10_dir = 'datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    VisualizeImage(X_train, y_train)
    input('Enter any key to Cross-validation...')

    num_train = 49000
    num_val = 1000
    # dataset Validation
    sample_index = range(num_train, num_train + num_val)
    X_val = X_train[sample_index]
    y_val = y_train[sample_index]
    X_train = X_train[:num_train]
    y_train = y_train[:num_train]

    # subtract the mean image
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))

    mean_image = np.mean(X_train, axis=0)
    X_train = X_train - mean_image
    X_test = X_test - mean_image
    X_val = X_val - mean_image

    # add a parameter for W
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])

    return X_train, y_train, X_test, y_test, X_val, y_val
예제 #14
0
def get_CIFAR10_data(num_training = 49000, num_validation = 1000, num_test = 10000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'E:/research/CS231n/cifar-10-python/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis = 0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    return X_train, y_train, X_val, y_val, X_test, y_test
예제 #15
0
def main():
    cifar10_dir = '../datasets/cifar-10-batches-py'

    # Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    num_training = 5000
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]

    num_test = 500
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    print(X_train.shape, X_test.shape)
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    dists = classifier.compute_distances_two_loops(X_test)
    y_test_pred = classifier.predict_labels(dists, k=1)

    # Compute and print the fraction of correctly predicted examples
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print('Got %d / %d correct => accuracy: %f' %
          (num_correct, num_test, accuracy))

    y_test_pred = classifier.predict_labels(dists, k=5)
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print('Got %d / %d correct => accuracy: %f' %
          (num_correct, num_test, accuracy))

    dists_one = classifier.compute_distances_one_loop(X_test)

    # To ensure that our vectorized implementation is correct, we make sure that it
    # agrees with the naive implementation. There are many ways to decide whether
    # two matrices are similar; one of the simplest is the Frobenius norm. In case
    # you haven't seen it before, the Frobenius norm of two matrices is the square
    # root of the squared sum of differences of all elements; in other words, reshape
    # the matrices into vectors and compute the Euclidean distance between them.
    difference = np.linalg.norm(dists - dists_one, ord='fro')
    print('One loop difference was: %f' % (difference, ))
    if difference < 0.001:
        print('Good! The distance matrices are the same')
    else:
        print('Uh-oh! The distance matrices are different')

    dists_two = classifier.compute_distances_no_loops(X_test)

    # check that the distance matrix agrees with the one we computed before:
    difference = np.linalg.norm(dists - dists_two, ord='fro')
    print('No loop difference was: %f' % (difference, ))
    if difference < 0.001:
        print('Good! The distance matrices are the same')
    else:
        print('Uh-oh! The distance matrices are different')
예제 #16
0
def main(dataset):
	# Load the CIFAR Data
	print "Loading data.."
	Xtr, Ytr, Xte, Yte = data_utils.load_CIFAR10(dataset)
	print "Loaded data!"
	Xtr = flatten(Xtr)
	Xte = flatten(Xte)

	mean_image = np.mean(Xtr, axis=0)
	Xtr = preProcess(Xtr, mean_image)
	Xte = preProcess(Xte, mean_image)

	N, D = Xtr.shape
	vSize = N * 20 / 100  # Set aside 20% of data for validation

	# Create network and run training
	nn = network.TwoLayerNet(3072, 1024, 10)
	stats = nn.train(Xtr[vSize:], Ytr[vSize:], Xtr[:vSize], Ytr[:vSize], verbose=False)
	
	# Do not print stats..
	#print stats['train_acc_history']
	#print stats['loss_history']
	#print stats['val_acc_history']
	#plt.plot(stats['train_acc_history'])
	#plt.show()
	
	# Test accuracy
	print "Training accuracy: %.2f" % stats['train_acc_history'][-1]
	print "Validation accuracy: %.2f" % stats['val_acc_history'][-1]
	print "Testing accuracy: %.2f" % (nn.accuracy(Xte, Yte)*100)
예제 #17
0
def make_bearing_dataset(data_dir,
                         n_validation=0,
                         vectorize=False,
                         num_labeled_samples=1320):
    NUM_CLASSES = 10

    X_train, y_train, X_test, y_test = load_CIFAR10(data_dir)
    # 39600, 32, 32, 1   39600,
    #  3750, 32, 32, 1   3750,

    NUM_TRAIN = X_train.shape[0]
    NUM_TEST = X_test.shape[0]
    # reshape to vectors
    if vectorize:
        X_train = np.reshape(X_train, (X_train.shape[0], -1))  # 39600, 1024
        X_test = np.reshape(X_test, (X_test.shape[0], -1))  # 3750, 1024

    # make one-hot coding
    y_train_temp = np.zeros((NUM_TRAIN, NUM_CLASSES))
    for i in range(NUM_TRAIN):
        y_train_temp[i, y_train[i]] = 1
    y_train = y_train_temp  # 39600, 10

    y_test_temp = np.zeros((NUM_TEST, NUM_CLASSES))
    for i in range(NUM_TEST):
        y_test_temp[i, y_test[i]] = 1
    y_test = y_test_temp  # 3750, 10

    X_train_labeled, y_train_labled = draw_labeled_data(
        X_train, y_train, labeled_sample_per_category=num_labeled_samples)

    return (X_train, y_train, X_train_labeled, y_train_labled, X_test, y_test)
def serialize_data():
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    datetime_str = datetime.datetime.today().strftime('%Y%m%d-%H:%M:%S')
    serialize_cifar_pool3(X_train, 'X_train_' + datetime_str)
    serialize_cifar_pool3(X_test, 'X_test_' + datetime_str)
    np.save('y_train_' + datetime_str, y_train)
    np.save('y_test_' + datetime_str, y_test)
예제 #19
0
def pre_dataset():
    cifar10_dir = 'D:/dataset/cifar-10-python/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    VisualizeImage(X_train, y_train)

    num_train = 49000
    num_val = 1000

    mask = range(num_train, num_train + num_val)
    X_val = X_train[mask]
    y_val = y_train[mask]
    X_train = X_train[:num_train]
    y_train = y_train[:num_train]

    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))

    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # add a parameter for W
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])

    return X_train, y_train, X_test, y_test, X_val, y_val
예제 #20
0
def serialize_data():
    X_train, y_train, X_test, y_test = load_CIFAR10(
        cifar10_dir
    )  # Change this line to take the sketches dataset as input using input_data_sketches.read_data_sets() for testing with sketches
    serialize_cifar_pool3(X_train, 'X_train_1')
    serialize_cifar_pool3(X_test, 'X_test_1')
    np.save('y_train_1', y_train)
    np.save('y_test_1', y_test)
예제 #21
0
파일: main.py 프로젝트: Zaru238/Courses
def get_CIFAR10_data(num_training=49000,
                     num_validation=1000,
                     num_test=1000,
                     num_dev=500):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the linear classifier. These are the same steps as we used for the
    SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = '../../../assignment1/cs231n/datasets/cifar-10-batches-py'

    # Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
    try:
        del X_train, y_train
        del X_test, y_test
        print('Clear previously loaded data.')
    except:
        pass

    X_train, y_train, X_test, y_test = data_utils.load_CIFAR10(cifar10_dir)

    # subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
예제 #22
0
def load_and_process(location = None):
    Xtr, Ytr, Xte, Yte = None, None, None, None,
    if location is None:
    	Xtr, Ytr, Xte, Yte = data_utils.load_CIFAR10(os.path.join(os.getcwd(),'cifar-10-batches-py'))
    else:
	Xtr, Ytr, Xte, Yte = data_utils.load_CIFAR10(os.path.join(os.getcwd(),location,'cifar-10-batches-py'))
    Xtr, Xte = np.reshape(Xtr,(Xtr.shape[0], 3072)), np.reshape(Xte, (Xte.shape[0], 3072))
    #preprocessing
    feature_maxes = np.abs(Xtr).max(axis = 0)
    Xtr = Xtr/feature_maxes
    Xte = Xte/feature_maxes
    mean_image = np.mean(Xtr, axis = 0)
    Xtr -= mean_image
    Xte -= mean_image
    #end preprocessing
    Xtr, Ytr = nn.shuffle_training_sets(Xtr,Ytr)
    training_set_size = Xtr.shape[0]
    Xtrain, Xval = Xtr[:int(training_set_size*.9)],Xtr[int(training_set_size*.9):]
    Ytrain, Yval = Ytr[:int(training_set_size*.9)], Ytr[int(training_set_size*.9):]
    return Xtrain, Ytrain, Xval, Yval, Xte, Yte
def getimage(image, batch_size, trainnum=2000, testnum=500):

    train_image = []
    train_label = []
    test_image = []
    test_label = []
    if image == 'FID':
        image = os.walk(r'D:\360download\FIDS30')
        classnum = 0
        for i in image:
            if i[1] == []:

                imagepath = glob.glob('%s\\*.jpg' % (i[0]))

                for i in range(len(imagepath[0:-5])):  #取后五张作为测试数据,其余训练
                    train_image.append(imagepath[i])
                    train_label.append(classnum)
                for i in range(5):
                    test_image.append(imagepath[i - 6])
                    test_label.append(classnum)
                classnum = classnum + 1
        # 调用图片生成器,把训练集图片转换成三维数组
        tr_data = ImageDataGenerator(images=train_image,
                                     labels=train_label,
                                     batch_size=batch_size,
                                     num_classes=classnum)

        # 调用图片生成器,把测试集图片转换成三维数组
        test_data = ImageDataGenerator(images=test_image,
                                       labels=test_label,
                                       batch_size=batch_size,
                                       num_classes=classnum,
                                       shuffle=False)
        tr_data = tr_data.data
        test_data = test_data.data
        return tr_data, test_data, classnum
    if image == 'cifar10':
        cifar10_dir = 'cifar-10-batches-py'
        X_train, y_train, X_test, y_test = load_CIFAR10(
            cifar10_dir)  #加载cifar数据
        train_image = X_train[list(range(trainnum))]
        train_label = y_train[list(range(trainnum))]
        test_image = X_test[list(range(testnum))]
        test_label = y_test[list(range(testnum))]
        classnum = 10
        tr_data = Dataset.from_tensor_slices((train_image, train_label))
        tr_data = tr_data.map(resize)
        tr_data = tr_data.batch(batch_size)
        test_data = Dataset.from_tensor_slices((test_image, test_label))
        test_data = test_data.map(resize)
        test_data = test_data.batch(batch_size)
        return tr_data, test_data, classnum
예제 #24
0
    def load_data(self):
        print "load cifar-10 data..."
        # Load the raw CIFAR-10 data.
        cifar10_dir = 'datasets/cifar-10'
        X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

        # As a sanity check, we print out the size of the training and test data.
        print 'Training data shape: ', X_train.shape
        print 'Training labels shape: ', y_train.shape
        print 'Test data shape: ', X_test.shape
        print 'Test labels shape: ', y_test.shape
        self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test
        print "load data done...\n---------\n"
예제 #25
0
def generate_hog_data():

    hog_X_train = np.load("hog_X_train.npy")
    hog_X_test = np.load("hog_X_test.npy")

    cifar10_dir = 'cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    X_train, y_train = extract_CIFAR10_samples(hog_X_train, y_train,
                                               X_train.shape[0] / 5)
    X_test, y_test = extract_CIFAR10_samples(hog_X_test, y_test,
                                             X_train.shape[0] / 5)
    np.save("X_hog_train", X_train)
    np.save("y_hog_train", y_train)
    np.save("X_hog_test", X_test)
    np.save("y_hog_test", y_test)
예제 #26
0
def get_CIFAR10_data(num_training=49000,
                     num_validation=1000,
                     num_test=1000,
                     num_dev=500):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the linear classifier. These are the same steps as we used for the
    SVM, but condensed to a single function.
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
예제 #27
0
def gen_datasets(cifar10_dir,
                 num_training=4900,
                 num_validation=1000,
                 num_test=1000,
                 num_dev=500):

    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    # Split the data into train, val, and test sets. In addition we will
    # create a small development set as a subset of the training data;
    # we can use this for development so our code runs faster.

    # Our validation set will be num_validation points from the original
    # training set.
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]

    # Our training set will be the first num_train points from the original
    # training set.
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]

    # We will also make a development set, which is a small subset of
    # the training set.
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # We use the first num_test points of the original test set as our
    # test set.
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    datasets = {}
    datasets['X_train'] = X_train
    datasets['X_val'] = X_val
    datasets['X_dev'] = X_dev
    datasets['X_test'] = X_test
    datasets['y_train'] = y_train
    datasets['y_dev'] = y_dev
    datasets['y_test'] = y_test
    datasets['y_val'] = y_val

    return datasets
예제 #28
0
def load_data_set():
    # 加载数据集
    cifar10_dir = '../cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # 将单幅图片转成 3072 维的向量
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))

    # 根据课程需要,将训练集缩小为 1/5
    X_train, y_train = extract_CIFAR10_samples(X_train, y_train,
                                               X_train.shape[0])
    X_test, y_test = extract_CIFAR10_samples(X_test, y_test, X_test.shape[0])
    np.save("X_train", X_train)
    np.save("y_train", y_train)
    np.save("X_test", X_test)
    np.save("y_test", y_test)
    return X_train, y_train, X_test, y_test
예제 #29
0
def get_whitened_image():
    cifar10_dir = '../../data/cifar10'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    X = np.concatenate((X_train, X_test),axis=0)
    y = np.concatenate((y_train, y_test),axis=0)
    # reshape x to (60000,1024,3)
    X = X.reshape((X.shape[0],X.shape[1]*X.shape[2],X.shape[3]))
    # normalization
    print('Global contrast normalization...')
    X = X.transpose((0,2,1)) # (60000,3,1024)
    X -= np.mean(X,axis=2).reshape((X.shape[0],X.shape[1],1))
    # ZCA whitening
    print('ZCA whitening...')
    for i in range(3):
        X[:,i] = zca_whitening(X[:,i])

    # save as (3,60000,1024)
    np.save('../../data/cifar10/cifar10.whitened_image.npy',X.transpose((1,0,2))*256.0)
    np.save('../../data/cifar10/cifar10.label.npy',y)
예제 #30
0
def get_CIFAR10_data(num_training=49000,
                     num_validation=1000,
                     num_test=1000,
                     num_dev=500):
    # Load the raw CIFAR-10 data
    cifar10_dir = 'datasets/cifar-10'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    mask = np.random.choice(num_training, num_dev, replace=False)
    X_dev = X_train[mask]
    y_dev = y_train[mask]

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_dev = np.reshape(X_dev, (X_dev.shape[0], -1))

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_dev -= mean_image

    # add bias dimension and transform into columns
    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
    X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

    return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
예제 #31
0
파일: main.py 프로젝트: Zaru238/Courses
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = '../../../assignment1/cs231n/datasets/cifar-10-batches-py'

    # Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
    try:
        del X_train, y_train
        del X_test, y_test
        print('Clear previously loaded data.')
    except:
        pass

    X_train, y_train, X_test, y_test = data_utils.load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = list(range(num_training, num_training + num_validation))
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    # Load the raw CIFAR-10 data
    cifar10_dir = r'machine_learning_study/dataset/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    # 49000-50000作为val
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    # 0-49000作为训练集
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    # 0-1000作为测试集
    X_test = X_test[mask]
    y_test = y_test[mask]

    return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
    cifar10_dir = './'+dataset_dir+'/cifar-10-batches-py'
    print cifar10_dir
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    X_train=X_train.swapaxes(1,3)
    X_val=X_val.swapaxes(1,3)
    X_test=X_test.swapaxes(1,3)
    return X_train, y_train, X_val, y_val, X_test, y_test
import neural_net, data_utils
import numpy as np
import matplotlib.pyplot as plt
import time
if __name__ == '__main__':

    start_time = time.time()

    input_size = 3072
    hidden_size = 500
    output_size =10
    momentum =0.95

    X_train, y_train, X_test, y_test = data_utils.load_CIFAR10("C:\Users\SHARATH\Git\cs291k-mp1\dataset")
    nn = neural_net.TwoLayerNet(input_size, hidden_size, output_size, 0.00001, momentum)

    #Configuration Parameters
    training_size =49000
    test_size = 10000
    validation_size = 1000

    learning_rate = 0.0001
    learning_rate_decay = 0.95
    reg = 0.01
    num_iters = 20000
    batch_size = 500
    verbose = True

    # Subsample the data
    mask = range(training_size, training_size + validation_size)
    X_val = X_train[mask]
예제 #35
0
        num_test = X_test.shape[0]
        Ypred = np.zeros(num_test, dtype = self.Ytrain.dtype)

        # loop over all test rows
        for i in xrange(num_test):
        # find the nearest training image to the i'th test image
        # using the L1 distance (sum of absolute value differences)
            distances = np.sum(np.abs(self.Xtrain - Xtest[i,:]), axis = 1)
            min_index = np.argmin(distances) # get the index with smallest distance
            Ypred[i] = self.Ytrain[min_index] # predict the label of the nearest example
            if(i % 5==0):
                print i
        return Ypred
        
a= L1Distance()
cifar10_dir = '/root/cs231n/assignment1/cs231n/datasets/cifar-10-batches-py/' 
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
print 'X_train shape:', X_train.shape
print 'y_train shape:', y_train.shape
print 'X_test shape:', X_test.shape
print 'y_test shape:', y_test.shape
a.train(X_train,y_train)

#why do we need shape[0]?      
Xtr_rows=X_train.reshape(X_train.shape[0],3*32*32)
Xte_rows=X_test.reshape(X_test.shape[0],3*32*32)
a.train(Xtr_rows,y_train)
yte_predict=a.predict(Xte_rows)
print 'accuracy: %f' % ( np.mean(yte_predict == y_test) )

from data_utils import load_CIFAR10
from k_nearest_neighbour import KNearestNeighbour
import numpy as np

Xtr,Ytr,Xte,Yte=load_CIFAR10('dataset/')#loaded Cifar10 data set as training set Xtr, labels of training set as Ytr, Xte of training set,Yte of Training set 

"""Converting Image data set to Raw Date Format"""
Xtr_rows=Xtr.reshape(Xtr.shape[0],Xtr.shape[1]*Xtr.shape[2]*Xtr.shape[3])
Xte_rows=Xte.reshape(Xte.shape[0],Xte.shape[1]*Xte.shape[2]*Xte.shape[3])

nn=KNearestNeighbour()
K=nn.train(Xtr_rows,Ytr)
Y_pred=np.zeros(Yte.shape[0],dtype=Ytr.dtype)
Y_pred=nn.predict(Xte_rows,K)
print "Efficiency in prediction %f for k=%d" % (np.mean(Y_pred==Yte),K)
예제 #37
0
	num_samples = Xtr.shape[0]

	Xval = Xtr[num_samples*0.8:]
	Yval = Ytr[num_samples*0.8:]
	Xtr = Xtr[:num_samples*0.8]
	Ytr = Ytr[:num_samples*0.8]

	return Xtr, Ytr, Xval, Yval

inputsize = 32*32*3
outputsize = 10

##Load Dataset
dir = os.path.dirname(__file__)
rootname = os.path.join(dir, 'dataset/cifar-10-batches-py')
Xtr, Ytr, Xte, Yte = load_CIFAR10(rootname)
Xtr = Xtr.reshape(50000,3072)
Xte = Xte.reshape(10000,3072)
Xtr, Ytr, Xval, Yval = split_strategy(Xtr, Ytr)

#define the hyper parameters
hiddenlayer_size_arg = 500
batch_size_arg = 2000
num_iters_arg =1000
learning_rate_arg =0.002
learning_rate_decay_arg =0.98
reg_arg=1e-5
verbose = False
'''
###### uncomment the following section to print the value of parameters ######
print "params values:"
예제 #38
0
import numpy as np
import time

if __name__ == '__main__':

    start_time = time.time()
    #C:\Users\SHARATH\Git\cs291k-mp1\dataset
    file_location = sys.argv[1]+"/cifar-10-batches-py"
    print file_location

    input_size = 3072
    hidden_size = 500
    output_size =10
    momentum =0.95

    X_train, y_train, X_test, y_test = data_utils.load_CIFAR10(file_location)
    nn = neural_net.TwoLayerNet(input_size, hidden_size, output_size, 0.00001, momentum)

    #Configuration Parameters
    training_size =49000
    test_size = 10000
    validation_size = 1000

    learning_rate = 0.0001
    learning_rate_decay = 0.95
    reg = 0.01
    num_iters = 20000
    batch_size = 500
    verbose = True

    mask = range(training_size, training_size + validation_size)