def PreDataset(): cifar10_dir = 'data/cifar10' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) VisualizeImg(X_train, y_train) input("Enter any key to Cross-validation...") num_train = 49000 num_val = 1000 # dataset validation sample_index = range(num_train, num_train + num_val) X_val = X_train[sample_index] y_val = y_train[sample_index] X_train = X_train[:num_train] y_train = y_train[:num_train] # 零中心化 X_train -= np.mean(X_train) X_val -= np.mean(X_val) X_test -= np.mean(X_test) # VisualizeImg(X_train, y_train) #零中心化效果显示 X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # 为偏置b在X上最后一列添加1 X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test
def knn_predict(): if len(sys.argv) < 3: print "need at least 2 parameters" exit(1) model = sys.argv[1] param1 = sys.argv[2] cifar10_dir = '../cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) #X_train, y_train = load_CIFAR_batch(cifar10_dir + '/data_batch_1') X_train = X_train[:1000] y_train = y_train[:1000] test_data = load_test_data() X_train = np.reshape(X_train, (X_train.shape[0], -1)) y_train = np.reshape(y_train, (y_train.shape[0], -1)) test_data = np.reshape(test_data, (test_data.shape[0], -1)) y_pred = do_LogisticRegression(X_train, y_train, test_data, C=66) print y_pred.shape scp_file = 'test.scp' fin = codecs.open(scp_file, 'r') images = fin.readlines() fin.close() assert (len(images) == y_pred.shape[0]) output = codecs.open('prediction.txt', 'w') for i in range(len(images)): basename = images[i].split('\n')[0] output.write(basename + ' ' + str(y_pred[i]) + '\n') output.close()
def get_CIFAR10_data(num_training=5000, num_validation=1000, num_test=500): """ On télécharge ici à partir du dossier et on prepare les données à être recu par le reseau de neuronne """ # Chargerment des données brutes. cifar10_dir = '../../datasets/cifar-10-batches-py/' print(cifar10_dir) X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Sous ensemble des données mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalisation des données, on soustrait la moyenne. mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val = X_val - mean_image X_test = X_test - mean_image X_train = X_train.swapaxes(1, 3) X_val = X_val.swapaxes(1, 3) return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ """ root_dir = '../dataset/cifar-10-batches-py' X_train, Y_train, X_test, Y_test = data_utils.load_CIFAR10(root_dir) mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] Y_val = Y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] Y_train = Y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] Y_test = Y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] Y_dev = Y_train[mask] #reshape and subtract the mean X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) mean_image = np.mean(X_train, axis=0) # 这一步是将数据数据零均值化 X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image return X_train, Y_train, X_val, Y_val, X_test, Y_test, X_dev, Y_dev
def pre_dataset(path): X_train, y_train, X_test, y_test = load_CIFAR10(path) num_train = 9000 num_val = 1000 mask = range(num_train, num_train + num_val) X_val = X_train[mask] y_val = y_train[mask] X_train = X_train[:num_train] y_train = y_train[:num_train] mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) """ print('Train data shape: {}'.format(X_train.shape)) print('Train labels shape: {}'.format(y_train.shape)) print('Validation data shape: {}'.format(X_val.shape)) print('Validation labels shape: {}'.format(y_val.shape)) print('Test data shape: {}'.format(X_test.shape)) print('Test labels shape: {}'.format(y_test.shape)) """ return X_train, y_train, X_test, y_test, X_val, y_val
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000): """ Use the cs231n data_utils.py script to load the data """ # Load the raw CIFAR-10 data cifar10_dir = 'cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000): ''' Train data shape: (49000, 32, 32, 3) Train labels shape: (49000,) ''' # Load the raw CIFAR-10 data cifar10_dir = 'datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): #Load the CIFAR-10 dataset from disk and perform preprocessing to prepare #it for the two-layer neural net classifier. These are the same steps as #we used for the SVM, but condensed to a single function. # Load the raw CIFAR-10 data cifar10_dir = 'datasets/cifar-10' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test
def load_CIFAR10(self): """ 读取数据并抽样可视化 Load the raw CIFAR-10 data. show a few examples of training images from each class. """ cifar10_dir = 'datasets/cifar-10' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # As a sanity check, we print out the size of the training and test data. print 'Training data shape: ', X_train.shape # (50000, 32, 32, 3) print 'Training labels shape: ', y_train.shape # (50000,) print 'Test data shape: ', X_test.shape # (10000, 32, 32, 3) print 'Test labels shape: ', y_test.shape # (10000,) # 可视化其中部分样本 classes = [ 'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] num_classes = len(classes) samples_per_class = 7 # 每个类别可视化 7 个样本 for y, clas in enumerate(classes): print "\n\ny =", y, "class =", clas idxs = np.flatnonzero(y_train == y) # 转为1维数组后 y_train = y 的元素下标 print "In training set, the number of class <", y, "> is", len( idxs) idxs = np.random.choice(idxs, samples_per_class, replace=False) # 从中随机选取 7 个样本 print "randomly select", samples_per_class, "samples in this set, these subscript is:", idxs[:] print "plt_index = ", for i, idx in enumerate(idxs): plt_idx = i * num_classes + y + 1 print plt_idx, plt.subplot(samples_per_class, num_classes, plt_idx) plt.imshow(X_train[idx].astype('uint8')) # 显示图片 plt.axis('off') if i == 0: plt.title(clas) # 保存图片 if not os.path.exists("visual_CIFAR10.jpg"): plt.savefig("visual_CIFAR10.jpg") plt.show() # 在本次实验中为了使得训练速度更快,因此抽样训练前5000个样本 print "sampling... reshape..." X_train = X_train[:5000] y_train = y_train[:5000] X_test = X_test[:500] y_test = y_test[:500] # 将矩阵 reshape 成 (nb_samples, nb_features) 的形式 X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print "X_train.shape =", X_train.shape, "X_test.shape =", X_test.shape # 赋值 self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test print "----"
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. """ # Load the raw CIFAR-10 data cifar10_dir = './cifar-10-batches-py' data_train_all, label_train_all, data_test, label_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) data_val = data_train_all[mask] label_val = label_train_all[mask] mask = range(num_training) data_train = data_train_all[mask] label_train = label_train_all[mask] mask = range(num_test) data_test = data_test[mask] label_test = label_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(data_train_all, axis=0) mean_test_image = np.mean(data_test,axis = 0) data_train_m = data_train - mean_image data_val_m = data_val - mean_image data_test_m = data_test - mean_test_image return data_train_m, label_train, data_val_m, label_val, data_test_m, label_test
def make_cifar10_dataset(cifar_dir, n_validation=0, vectorize=False): NUM_CLASSES = 10 NUM_TRAIN = 50000 NUM_TEST = 10000 cifar10_dir = '/home/elfeki/Workspace/VAE_DPP/CIFAR/data/cifar10/' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # reshape to vectors if vectorize: X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # make one-hot coding y_train_temp = np.zeros((NUM_TRAIN, NUM_CLASSES)) for i in range(NUM_TRAIN): y_train_temp[i, y_train[i]] = 1 y_train = y_train_temp y_test_temp = np.zeros((NUM_TEST, NUM_CLASSES)) for i in range(NUM_TEST): y_test_temp[i, y_test[i]] = 1 y_test = y_test_temp # make validation set X_valid = X_train[:n_validation] X_train = X_train[n_validation:] y_valid = y_train[:n_validation] y_train = y_train[n_validation:] return (X_train, y_train, X_valid, y_valid, X_test, y_test)
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = './datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test
def pre_dataset(): cifar10_dir = 'datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) VisualizeImage(X_train, y_train) input('Enter any key to Cross-validation...') num_train = 49000 num_val = 1000 # dataset Validation sample_index = range(num_train, num_train + num_val) X_val = X_train[sample_index] y_val = y_train[sample_index] X_train = X_train[:num_train] y_train = y_train[:num_train] # subtract the mean image X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) mean_image = np.mean(X_train, axis=0) X_train = X_train - mean_image X_test = X_test - mean_image X_val = X_val - mean_image # add a parameter for W X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) return X_train, y_train, X_test, y_test, X_val, y_val
def get_CIFAR10_data(num_training = 49000, num_validation = 1000, num_test = 10000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'E:/research/CS231n/cifar-10-python/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis = 0) X_train -= mean_image X_val -= mean_image X_test -= mean_image return X_train, y_train, X_val, y_val, X_test, y_test
def main(): cifar10_dir = '../datasets/cifar-10-batches-py' # Cleaning up variables to prevent loading data multiple times (which may cause memory issue) X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) num_training = 5000 mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] num_test = 500 mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print(X_train.shape, X_test.shape) classifier = KNearestNeighbor() classifier.train(X_train, y_train) dists = classifier.compute_distances_two_loops(X_test) y_test_pred = classifier.predict_labels(dists, k=1) # Compute and print the fraction of correctly predicted examples num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) dists_one = classifier.compute_distances_one_loop(X_test) # To ensure that our vectorized implementation is correct, we make sure that it # agrees with the naive implementation. There are many ways to decide whether # two matrices are similar; one of the simplest is the Frobenius norm. In case # you haven't seen it before, the Frobenius norm of two matrices is the square # root of the squared sum of differences of all elements; in other words, reshape # the matrices into vectors and compute the Euclidean distance between them. difference = np.linalg.norm(dists - dists_one, ord='fro') print('One loop difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different') dists_two = classifier.compute_distances_no_loops(X_test) # check that the distance matrix agrees with the one we computed before: difference = np.linalg.norm(dists - dists_two, ord='fro') print('No loop difference was: %f' % (difference, )) if difference < 0.001: print('Good! The distance matrices are the same') else: print('Uh-oh! The distance matrices are different')
def main(dataset): # Load the CIFAR Data print "Loading data.." Xtr, Ytr, Xte, Yte = data_utils.load_CIFAR10(dataset) print "Loaded data!" Xtr = flatten(Xtr) Xte = flatten(Xte) mean_image = np.mean(Xtr, axis=0) Xtr = preProcess(Xtr, mean_image) Xte = preProcess(Xte, mean_image) N, D = Xtr.shape vSize = N * 20 / 100 # Set aside 20% of data for validation # Create network and run training nn = network.TwoLayerNet(3072, 1024, 10) stats = nn.train(Xtr[vSize:], Ytr[vSize:], Xtr[:vSize], Ytr[:vSize], verbose=False) # Do not print stats.. #print stats['train_acc_history'] #print stats['loss_history'] #print stats['val_acc_history'] #plt.plot(stats['train_acc_history']) #plt.show() # Test accuracy print "Training accuracy: %.2f" % stats['train_acc_history'][-1] print "Validation accuracy: %.2f" % stats['val_acc_history'][-1] print "Testing accuracy: %.2f" % (nn.accuracy(Xte, Yte)*100)
def make_bearing_dataset(data_dir, n_validation=0, vectorize=False, num_labeled_samples=1320): NUM_CLASSES = 10 X_train, y_train, X_test, y_test = load_CIFAR10(data_dir) # 39600, 32, 32, 1 39600, # 3750, 32, 32, 1 3750, NUM_TRAIN = X_train.shape[0] NUM_TEST = X_test.shape[0] # reshape to vectors if vectorize: X_train = np.reshape(X_train, (X_train.shape[0], -1)) # 39600, 1024 X_test = np.reshape(X_test, (X_test.shape[0], -1)) # 3750, 1024 # make one-hot coding y_train_temp = np.zeros((NUM_TRAIN, NUM_CLASSES)) for i in range(NUM_TRAIN): y_train_temp[i, y_train[i]] = 1 y_train = y_train_temp # 39600, 10 y_test_temp = np.zeros((NUM_TEST, NUM_CLASSES)) for i in range(NUM_TEST): y_test_temp[i, y_test[i]] = 1 y_test = y_test_temp # 3750, 10 X_train_labeled, y_train_labled = draw_labeled_data( X_train, y_train, labeled_sample_per_category=num_labeled_samples) return (X_train, y_train, X_train_labeled, y_train_labled, X_test, y_test)
def serialize_data(): X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) datetime_str = datetime.datetime.today().strftime('%Y%m%d-%H:%M:%S') serialize_cifar_pool3(X_train, 'X_train_' + datetime_str) serialize_cifar_pool3(X_test, 'X_test_' + datetime_str) np.save('y_train_' + datetime_str, y_train) np.save('y_test_' + datetime_str, y_test)
def pre_dataset(): cifar10_dir = 'D:/dataset/cifar-10-python/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) VisualizeImage(X_train, y_train) num_train = 49000 num_val = 1000 mask = range(num_train, num_train + num_val) X_val = X_train[mask] y_val = y_train[mask] X_train = X_train[:num_train] y_train = y_train[:num_train] X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # add a parameter for W X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) return X_train, y_train, X_test, y_test, X_val, y_val
def serialize_data(): X_train, y_train, X_test, y_test = load_CIFAR10( cifar10_dir ) # Change this line to take the sketches dataset as input using input_data_sketches.read_data_sets() for testing with sketches serialize_cifar_pool3(X_train, 'X_train_1') serialize_cifar_pool3(X_test, 'X_test_1') np.save('y_train_1', y_train) np.save('y_test_1', y_test)
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the linear classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = '../../../assignment1/cs231n/datasets/cifar-10-batches-py' # Cleaning up variables to prevent loading data multiple times (which may cause memory issue) try: del X_train, y_train del X_test, y_test print('Clear previously loaded data.') except: pass X_train, y_train, X_test, y_test = data_utils.load_CIFAR10(cifar10_dir) # subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def load_and_process(location = None): Xtr, Ytr, Xte, Yte = None, None, None, None, if location is None: Xtr, Ytr, Xte, Yte = data_utils.load_CIFAR10(os.path.join(os.getcwd(),'cifar-10-batches-py')) else: Xtr, Ytr, Xte, Yte = data_utils.load_CIFAR10(os.path.join(os.getcwd(),location,'cifar-10-batches-py')) Xtr, Xte = np.reshape(Xtr,(Xtr.shape[0], 3072)), np.reshape(Xte, (Xte.shape[0], 3072)) #preprocessing feature_maxes = np.abs(Xtr).max(axis = 0) Xtr = Xtr/feature_maxes Xte = Xte/feature_maxes mean_image = np.mean(Xtr, axis = 0) Xtr -= mean_image Xte -= mean_image #end preprocessing Xtr, Ytr = nn.shuffle_training_sets(Xtr,Ytr) training_set_size = Xtr.shape[0] Xtrain, Xval = Xtr[:int(training_set_size*.9)],Xtr[int(training_set_size*.9):] Ytrain, Yval = Ytr[:int(training_set_size*.9)], Ytr[int(training_set_size*.9):] return Xtrain, Ytrain, Xval, Yval, Xte, Yte
def getimage(image, batch_size, trainnum=2000, testnum=500): train_image = [] train_label = [] test_image = [] test_label = [] if image == 'FID': image = os.walk(r'D:\360download\FIDS30') classnum = 0 for i in image: if i[1] == []: imagepath = glob.glob('%s\\*.jpg' % (i[0])) for i in range(len(imagepath[0:-5])): #取后五张作为测试数据,其余训练 train_image.append(imagepath[i]) train_label.append(classnum) for i in range(5): test_image.append(imagepath[i - 6]) test_label.append(classnum) classnum = classnum + 1 # 调用图片生成器,把训练集图片转换成三维数组 tr_data = ImageDataGenerator(images=train_image, labels=train_label, batch_size=batch_size, num_classes=classnum) # 调用图片生成器,把测试集图片转换成三维数组 test_data = ImageDataGenerator(images=test_image, labels=test_label, batch_size=batch_size, num_classes=classnum, shuffle=False) tr_data = tr_data.data test_data = test_data.data return tr_data, test_data, classnum if image == 'cifar10': cifar10_dir = 'cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10( cifar10_dir) #加载cifar数据 train_image = X_train[list(range(trainnum))] train_label = y_train[list(range(trainnum))] test_image = X_test[list(range(testnum))] test_label = y_test[list(range(testnum))] classnum = 10 tr_data = Dataset.from_tensor_slices((train_image, train_label)) tr_data = tr_data.map(resize) tr_data = tr_data.batch(batch_size) test_data = Dataset.from_tensor_slices((test_image, test_label)) test_data = test_data.map(resize) test_data = test_data.batch(batch_size) return tr_data, test_data, classnum
def load_data(self): print "load cifar-10 data..." # Load the raw CIFAR-10 data. cifar10_dir = 'datasets/cifar-10' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # As a sanity check, we print out the size of the training and test data. print 'Training data shape: ', X_train.shape print 'Training labels shape: ', y_train.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape self.X_train, self.y_train, self.X_test, self.y_test = X_train, y_train, X_test, y_test print "load data done...\n---------\n"
def generate_hog_data(): hog_X_train = np.load("hog_X_train.npy") hog_X_test = np.load("hog_X_test.npy") cifar10_dir = 'cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) X_train, y_train = extract_CIFAR10_samples(hog_X_train, y_train, X_train.shape[0] / 5) X_test, y_test = extract_CIFAR10_samples(hog_X_test, y_test, X_train.shape[0] / 5) np.save("X_hog_train", X_train) np.save("y_hog_train", y_train) np.save("X_hog_test", X_test) np.save("y_hog_test", y_test)
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the linear classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def gen_datasets(cifar10_dir, num_training=4900, num_validation=1000, num_test=1000, num_dev=500): X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Split the data into train, val, and test sets. In addition we will # create a small development set as a subset of the training data; # we can use this for development so our code runs faster. # Our validation set will be num_validation points from the original # training set. mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] # Our training set will be the first num_train points from the original # training set. mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] # We will also make a development set, which is a small subset of # the training set. mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # We use the first num_test points of the original test set as our # test set. mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] datasets = {} datasets['X_train'] = X_train datasets['X_val'] = X_val datasets['X_dev'] = X_dev datasets['X_test'] = X_test datasets['y_train'] = y_train datasets['y_dev'] = y_dev datasets['y_test'] = y_test datasets['y_val'] = y_val return datasets
def load_data_set(): # 加载数据集 cifar10_dir = '../cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # 将单幅图片转成 3072 维的向量 X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # 根据课程需要,将训练集缩小为 1/5 X_train, y_train = extract_CIFAR10_samples(X_train, y_train, X_train.shape[0]) X_test, y_test = extract_CIFAR10_samples(X_test, y_test, X_test.shape[0]) np.save("X_train", X_train) np.save("y_train", y_train) np.save("X_test", X_test) np.save("y_test", y_test) return X_train, y_train, X_test, y_test
def get_whitened_image(): cifar10_dir = '../../data/cifar10' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) X = np.concatenate((X_train, X_test),axis=0) y = np.concatenate((y_train, y_test),axis=0) # reshape x to (60000,1024,3) X = X.reshape((X.shape[0],X.shape[1]*X.shape[2],X.shape[3])) # normalization print('Global contrast normalization...') X = X.transpose((0,2,1)) # (60000,3,1024) X -= np.mean(X,axis=2).reshape((X.shape[0],X.shape[1],1)) # ZCA whitening print('ZCA whitening...') for i in range(3): X[:,i] = zca_whitening(X[:,i]) # save as (3,60000,1024) np.save('../../data/cifar10/cifar10.whitened_image.npy',X.transpose((1,0,2))*256.0) np.save('../../data/cifar10/cifar10.label.npy',y)
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): # Load the raw CIFAR-10 data cifar10_dir = 'datasets/cifar-10' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = '../../../assignment1/cs231n/datasets/cifar-10-batches-py' # Cleaning up variables to prevent loading data multiple times (which may cause memory issue) try: del X_train, y_train del X_test, y_test print('Clear previously loaded data.') except: pass X_train, y_train, X_test, y_test = data_utils.load_CIFAR10(cifar10_dir) # Subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): # Load the raw CIFAR-10 data cifar10_dir = r'machine_learning_study/dataset/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) # 49000-50000作为val X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) # 0-49000作为训练集 X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) # 0-1000作为测试集 X_test = X_test[mask] y_test = y_test[mask] return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): cifar10_dir = './'+dataset_dir+'/cifar-10-batches-py' print cifar10_dir X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_train=X_train.swapaxes(1,3) X_val=X_val.swapaxes(1,3) X_test=X_test.swapaxes(1,3) return X_train, y_train, X_val, y_val, X_test, y_test
import neural_net, data_utils import numpy as np import matplotlib.pyplot as plt import time if __name__ == '__main__': start_time = time.time() input_size = 3072 hidden_size = 500 output_size =10 momentum =0.95 X_train, y_train, X_test, y_test = data_utils.load_CIFAR10("C:\Users\SHARATH\Git\cs291k-mp1\dataset") nn = neural_net.TwoLayerNet(input_size, hidden_size, output_size, 0.00001, momentum) #Configuration Parameters training_size =49000 test_size = 10000 validation_size = 1000 learning_rate = 0.0001 learning_rate_decay = 0.95 reg = 0.01 num_iters = 20000 batch_size = 500 verbose = True # Subsample the data mask = range(training_size, training_size + validation_size) X_val = X_train[mask]
num_test = X_test.shape[0] Ypred = np.zeros(num_test, dtype = self.Ytrain.dtype) # loop over all test rows for i in xrange(num_test): # find the nearest training image to the i'th test image # using the L1 distance (sum of absolute value differences) distances = np.sum(np.abs(self.Xtrain - Xtest[i,:]), axis = 1) min_index = np.argmin(distances) # get the index with smallest distance Ypred[i] = self.Ytrain[min_index] # predict the label of the nearest example if(i % 5==0): print i return Ypred a= L1Distance() cifar10_dir = '/root/cs231n/assignment1/cs231n/datasets/cifar-10-batches-py/' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) print 'X_train shape:', X_train.shape print 'y_train shape:', y_train.shape print 'X_test shape:', X_test.shape print 'y_test shape:', y_test.shape a.train(X_train,y_train) #why do we need shape[0]? Xtr_rows=X_train.reshape(X_train.shape[0],3*32*32) Xte_rows=X_test.reshape(X_test.shape[0],3*32*32) a.train(Xtr_rows,y_train) yte_predict=a.predict(Xte_rows) print 'accuracy: %f' % ( np.mean(yte_predict == y_test) )
from data_utils import load_CIFAR10 from k_nearest_neighbour import KNearestNeighbour import numpy as np Xtr,Ytr,Xte,Yte=load_CIFAR10('dataset/')#loaded Cifar10 data set as training set Xtr, labels of training set as Ytr, Xte of training set,Yte of Training set """Converting Image data set to Raw Date Format""" Xtr_rows=Xtr.reshape(Xtr.shape[0],Xtr.shape[1]*Xtr.shape[2]*Xtr.shape[3]) Xte_rows=Xte.reshape(Xte.shape[0],Xte.shape[1]*Xte.shape[2]*Xte.shape[3]) nn=KNearestNeighbour() K=nn.train(Xtr_rows,Ytr) Y_pred=np.zeros(Yte.shape[0],dtype=Ytr.dtype) Y_pred=nn.predict(Xte_rows,K) print "Efficiency in prediction %f for k=%d" % (np.mean(Y_pred==Yte),K)
num_samples = Xtr.shape[0] Xval = Xtr[num_samples*0.8:] Yval = Ytr[num_samples*0.8:] Xtr = Xtr[:num_samples*0.8] Ytr = Ytr[:num_samples*0.8] return Xtr, Ytr, Xval, Yval inputsize = 32*32*3 outputsize = 10 ##Load Dataset dir = os.path.dirname(__file__) rootname = os.path.join(dir, 'dataset/cifar-10-batches-py') Xtr, Ytr, Xte, Yte = load_CIFAR10(rootname) Xtr = Xtr.reshape(50000,3072) Xte = Xte.reshape(10000,3072) Xtr, Ytr, Xval, Yval = split_strategy(Xtr, Ytr) #define the hyper parameters hiddenlayer_size_arg = 500 batch_size_arg = 2000 num_iters_arg =1000 learning_rate_arg =0.002 learning_rate_decay_arg =0.98 reg_arg=1e-5 verbose = False ''' ###### uncomment the following section to print the value of parameters ###### print "params values:"
import numpy as np import time if __name__ == '__main__': start_time = time.time() #C:\Users\SHARATH\Git\cs291k-mp1\dataset file_location = sys.argv[1]+"/cifar-10-batches-py" print file_location input_size = 3072 hidden_size = 500 output_size =10 momentum =0.95 X_train, y_train, X_test, y_test = data_utils.load_CIFAR10(file_location) nn = neural_net.TwoLayerNet(input_size, hidden_size, output_size, 0.00001, momentum) #Configuration Parameters training_size =49000 test_size = 10000 validation_size = 1000 learning_rate = 0.0001 learning_rate_decay = 0.95 reg = 0.01 num_iters = 20000 batch_size = 500 verbose = True mask = range(training_size, training_size + validation_size)