def get_data(): from cs231n.data_utils import load_CIFAR10 cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) plot_dataset(X_train, y_train) num_training = 49000 num_validation = 1000 num_test = 1000 mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print '[Reshaped] Training data shape: ', X_train.shape, y_train.shape print '[Reshaped] Validation data shape: ', X_val.shape, y_val.shape print '[Reshaped] Test data shape: ', X_test.shape, y_test.shape return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test
def load_data(): # Load the raw CIFAR-10 data. cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # As a sanity check, we print out the size of the training and test data. print 'Training data shape: ', X_train.shape print 'Training labels shape: ', y_train.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape # Subsample the data for more efficient code execution in this exercise num_training = 5000 mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] num_test = 500 mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # visualize_data(X_train, y_train) # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print X_train.shape, X_test.shape return X_train, y_train, X_test, y_test, num_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the linear classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' # Cleaning up variables to prevent loading data multiple times (which may cause memory issue) try: del X_train, y_train del X_test, y_test print('Clear previously loaded data.') except: pass X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def get_CIFAR10_data(): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, _, X_test, _ = load_CIFAR10(cifar10_dir) X = np.concatenate((X_train, X_test), axis =0) return X
def dataPrepare(cifar10_dir, num_training, num_test): # Load the raw CIFAR-10 data. X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # As a sanity check, we print out the size of the training and test data. print('Training data shape: ', X_train.shape) print('Training labels shape: ', y_train.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape) #print # Training data shape: (50000, 32, 32, 3) # Training labels shape: (50000,) # Test data shape: (10000, 32, 32, 3) # Test labels shape: (10000,) # Visualize some examples from the dataset. # We show a few examples of training images from each class. classes = [ 'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] num_classes = len(classes) samples_per_class = 7 for y, cls in enumerate(classes): idxs = np.flatnonzero(y_train == y) idxs = np.random.choice(idxs, samples_per_class, replace=False) for i, idx in enumerate(idxs): plt_idx = i * num_classes + y + 1 plt.subplot(samples_per_class, num_classes, plt_idx) plt.imshow(X_train[idx].astype('uint8')) plt.axis('off') if i == 0: plt.title(cls) # plt.show() # Subsample the data for more efficient code execution in this exercise # Only choose top 5000 in training data # Only choose top 500 in test data mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) return X_train, y_train, X_test, y_test
def testNearestNeighbour(self): Xtr, Ytr, Xte, Yte = datautils.load_CIFAR10('../../data/cifar10/') Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072 Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072 Xte_rows = Xte_rows[0:20, :] Yte = Yte[0:20] nn = NearestNeighbour() nn.train(Xtr_rows, Ytr) Yte_predict = nn.predict(Xte_rows) print('accuracy: %f' % (np.mean(Yte_predict == Yte)))
def get_datasets(num_train=100, num_test=20, preprocessing=True): X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) X_train -= np.mean(np.mean(np.mean(X_train, axis=0), axis=0), axis=0) ###data preprocessing X_train = X_train.reshape((X_train.shape[0], -1)) X_train = X_train[:num_train] y_train = y_train[:num_train] X_test -= np.mean(np.mean(np.mean(X_test, axis=0), axis=0), axis=0) ###data preprocessing X_test = X_test.reshape((X_test.shape[0], -1)) X_test = X_test[:num_test] y_test = y_test[:num_test] return X_train, y_train, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the linear classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data current_file = os.getcwd() cifar10_dir = current_file + '\cs231n\datasets\cifar-10-python\cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) # the last column is the bias parameter . WX+b return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the linear classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image # It is along a row (axis=0) because you calculate the mean for a given pixel # at every coordinate mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the linear classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = os.path.dirname( 'C:\\Users\\Matthew\\Documents\\jerusml_deeplearning\\assignments\\spring1617_assignment1\\cs231n\datasets\\cifar-10-python\\' ) X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Our validation set will be num_validation points from the original # training set. mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] # Our training set will be the first num_train points from the original # training set. mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] # We use the first num_test points of the original test set as our # test set. mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # We will also make a development set, which is a small subset of # the training set. mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the linear classifier. """ # Load the raw CIFAR-10 data cifar10_dir = '../datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def main(): plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # Load data # Load the raw CIFAR-10 data. cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # As a sanity check, we print out the size of the training and test data. print 'Training data shape: ', X_train.shape print 'Training labels shape: ', y_train.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape VisualizeDataSetSamples(X_train, y_train)
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): # Load the raw CIFAR-10 data cifar10_dir = 'C:\Satan\HW\#cs231n\prog1\cs231n\datasets\cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=16000, num_validation=1000, num_test=1000): # Load the raw CIFAR-10 data cifar10_dir = 'F:/jupyter/assignment1/cs231n/datasets/CIFAR10/' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] return X_train, y_train, X_val, y_val, X_test, y_test
def testKNearestNeightbour(self): test_images = 10 validation_accuracies = [] Xtr, Ytr, Xte, Yte = datautils.load_CIFAR10('../../data/cifar10/') Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072 Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072 Xte_rows = Xte_rows[0:test_images, :] Yte = Yte[0:test_images] nn = KNearestNeighbour() nn.train(Xtr_rows, Ytr) for k in [1, 3, 5, 7, 10, 20, 50, 10]: Yte_predict = nn.predict(Xte_rows, k) acc = np.mean(Yte_predict == Yte) print('accuracy: %f' % acc) validation_accuracies.append(acc) plt.bar(validation_accuracies)
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the linear classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis = 0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def getQoS(self): X_train, y_train, X_test, y_test = load_CIFAR10(self.data_path) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_train = np.reshape(X_train, (X_train.shape[0], -1)) mean_image = np.mean(X_train, axis=0) X_test -= mean_image X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) svm = LinearSVM() try: svm.W = pickle.load(open(self.run_dir + "model_svm.p", "rb"), encoding='latin1') y_test_pred = svm.predict(X_test) test_accuracy = np.mean(y_test == y_test_pred) except: test_accuracy = 0.0 return test_accuracy * 100.0
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # sampling the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] #reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) #Normalizing the data around center by subtracting it with mean value of pixel values mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image # for converting into columns , adding a bias X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def get_CIFAR10_data(num_training = 49000, num_validation = 1000, num_test = 1000, num_dev = 500): #加载原始cifar10数据 X_train, y_train, X_test, y_test = load_CIFAR10(r"C:\Users\Michael-School\PycharmProjects\datasets\cifar-10-batches-py") # print("X_train:",np.shape(X_train)) #(50000, 32, 32, 3) # print("y_train:",np.shape(y_train)) #(50000,) # print("X_test:",np.shape(X_test)) #X_test: (10000, 32, 32, 3) # print("y_test",np.shape(y_test)) #y_test (10000,) #对数据二次采样:从数据集中取数据子集用于后面的训练 mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] mask = np.random.choice(num_training, num_dev, replace=False) X_dev = X_train[mask] y_dev = y_train[mask] #数据预处理:将一幅图像变成一行存在相应的矩阵里 X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) X_dev = np.reshape(X_dev, (X_dev.shape[0], -1)) #标准化数据:先求平均图像,再将每个图像都减去平均图像,这样预处理可以加速后期优化过程中权重参数的收敛性 mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image X_dev -= mean_image #增加偏置的维度:在原矩阵后加一个全是1的列 X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]) X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]) X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]) X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))]) return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' # Cleaning up variables to prevent loading data multiple times (which may cause memory issue) try: del X_train, y_train del X_test, y_test print('Clear previously loaded data.') except: pass X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test
def main(): X_train, y_train, X_test, y_test = load_CIFAR10('../cifar-10-batches-py') num_training = 48000 mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] num_test = 1000 mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Reshape the image data into rows print(X_train.shape) ''' (48000, 32, 32, 3) ''' X_train = np.reshape(X_train, (X_train.shape[0], -1)) print(X_train.shape) ''' (48000, 3072) ''' X_test = np.reshape(X_test, (X_test.shape[0], -1)) print(X_train.shape, X_test.shape) ''' (48000, 3072) (1000, 3072) ''' classifier = KNearestNeighbor() classifier.train(X_train, y_train) y_test_pred = classifier.predict(X_test, k=5) print(y_test_pred) # Compute and display the accuracy num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct) / num_test print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)) '''
def get_CIFAR10_data(num_training=49000,num_validation=1000,num_test=10000): cifar10_path = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_path) mask = range(num_training,num_training+num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] mean_image = np.mean(X_train,axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image return X_train,y_train,X_val,y_val,X_test,y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, num_dev=500): """ 从硬盘中加载cifar-10数据集并预处理,为2层前联接网络分类器作准备. 采取的步骤和SVM 实验中相同,只不过这里把它压缩成了一个函数. """ # 加载原始cifar-10数据集 cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # #验证集中的样本是原始训练集中的num_validation(1000)个样本 mask = list(range(num_training, num_training + num_validation)) X_val = X_train[mask] y_val = y_train[mask] # 训练集是原始训练集中的前num_training(49000)个样本 mask = list(range(num_training)) X_train = X_train[mask] y_train = y_train[mask] # 测试集是原始测试集中的前num_test(1000)个样本 mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] # 预处理 将每张图像(32,32,3)拉伸为一维数组(3072,) # 各个数据集从四维数组(m,32,32,3) 转型为2维数组(m,3072) X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # 预处理 0均值化 减去图像每个像素/特征上的平均值 # 基于训练数据 计算图像每个像素/特征上的平均值 mean_image = np.mean(X_train, axis=0) # 各个数据集减去基于训练集计算的各个像素/特征的平均值 # (m,3072) - (3072,) 广播运算 X_train -= mean_image X_val -= mean_image X_test -= mean_image return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) pca = decomposition.PCA(n_components=None, whiten=False) X_train = pca.fit_transform(X_train) X_val = pca.transform(X_val) X_test = pca.transform(X_test) return X_train, y_train, X_val, y_val, X_test, y_test
def get_CIFAR10_data(num_training=9000, num_validation=1000, num_test=1000): """ Load the CIFAR-10 dataset from disk and perform preprocessing to prepare it for the linear classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 data cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Preprocessing: reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_val = np.reshape(X_val, (X_val.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # Normalize the data: subtract the mean image mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_val -= mean_image X_test -= mean_image # add bias dimension and transform into columns X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T return X_train, y_train, X_val, y_val, X_test, y_test
def gen_train_test(num_training, num_test): """ generate training and testing dataset """ cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) print 'Training data shape: ', X_train.shape print 'Training labels shape: ', y_train.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape # Subsample the data for more efficient code execution mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print X_train.shape, X_test.shape return X_train, y_train, X_test, y_test
plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' # Some more magic so that the notebook will reload external python modules; # see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython #get_ipython().magic(u'load_ext autoreload') #get_ipython().magic(u'autoreload 2') # ## CIFAR-10 Data Loading and Preprocessing # In[2]: # Load the raw CIFAR-10 data. cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) # As a sanity check, we print out the size of the training and test data. print 'Training data shape: ', X_train.shape print 'Training labels shape: ', y_train.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape # In[3]: # Visualize some examples from the dataset. # We show a few examples of training images from each class. classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] num_classes = len(classes) #samples_per_class = 7
def test1(): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) print 'Training data shape:', X_train.shape print 'Training label shape:', y_train.shape print 'Test data shape:', X_test.shape print 'Test label shape:', y_test.shape # classes = ['plane','car','bird','cat','deer','dog','frog','horse','ship','truck'] # num_classes = len(classes) # sample_per_class = 7 # for y,cls in enumerate(classes): # idxs = np.flatnonzero(y_train == y) # idxs = np.random.choice(idxs, sample_per_class, replace=False) # for i, idx in enumerate(idxs): # plt_idx = i*num_classes + y + 1 # plt.subplot(sample_per_class, num_classes, plt_idx) # plt.imshow(X_train[idx].astype('uint8')) # plt.axis('off') # if i == 0: # plt.title(cls) # plt.savefig("./figures/cifar_sample.png") # plt.show() # plt.close() num_training = 5000 mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] num_test = 500 mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] X_train = np.reshape(X_train, (X_train.shape[0],-1)) X_test = np.reshape(X_test,(X_test.shape[0],-1)) print X_train.shape, X_test.shape from cs231n.classifiers import KNearestNeighbor classifier = KNearestNeighbor() classifier.train(X_train, y_train) # two_loop_time = time_function(classifier.compute_distances_two_loops,X_test) # print "two loop time %f" % two_loop_time # one_loop_time = time_function(classifier.compute_distances_one_loop,X_test) # print "one loop time %f " %one_loop_time # no_loop_time = time_function(classifier.compute_distances_no_loops,X_test) # print "no loop time %f "% no_loop_time dists = classifier.compute_distances_no_loops(X_test) # dist_one_loop = classifier.compute_distances_one_loop(X_test) # dist_two_loops = classifier.compute_distances_two_loops(X_test) #matrix_compare(dists,dist_one_loop) #matrix_compare(dists,dist_two_loops) y_test_pred = classifier.predict_labels(dists,k=5) num_correct = np.sum(y_test_pred == y_test) accuracy = float(num_correct)/num_test print "God %d/%d correct => accuracy: %f" %(num_correct, num_test, accuracy) cross_validate(X_train,y_train)
def Xtrain(): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, *_ = load_CIFAR10(cifar10_dir) return X_train
def ytrain(): cifar10_dir = 'cs231n/datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) return y_train