def get_data(): X_train, Y_train, X_test, Y_test = load_CIFAR10(cifar10_dir) #compute the features X_train_feat = np.concatenate( [get_features(img, 10) for img in tqdm(X_train)]) X_test_feat = np.concatenate( [get_features(img, 10) for img in tqdm(X_test)]) return X_train_feat, Y_train, X_test_feat, Y_test
def load_data(): Xtr, Ytr, Xte, Yte = load_CIFAR10('cifar-10-batches-py') Xtr = np.array(Xtr,dtype=np.int32) Ytr = np.array(Ytr,dtype=np.int32) Xte = np.array(Xte,dtype=np.int32) Yte = np.array(Yte,dtype=np.int32) Xva = Xtr[40001:,:] Yva = Ytr[40001:] Xtr = Xtr[:40000,:] Ytr = Ytr[:40000] indices = range(0,Xte.shape[0],200) return Xtr, Ytr, Xva[indices,:], Yva[indices], Xte[indices,:], Yte[indices]
# Runs a KNN classifier on a subset of CIFAR-10 data. import numpy as np import cifar10 import timer dir = 'datasets/cifar-10-batches-py' X_train, y_train, X_test, y_test = cifar10.load_CIFAR10(dir) print 'Training data shape: ', X_train.shape, X_train.dtype print 'Training labels shape: ', y_train.shape, y_train.dtype print 'Test data shape: ', X_test.shape, X_test.dtype print 'Test labels shape: ', y_test.shape, y_test.dtype # Subsample to save on time/space num_training = 50000 X_train = X_train[:num_training] y_train = y_train[:num_training] num_test = 100 X_test = X_test[:num_test] y_test = y_test[:num_test] # Reshape the image data into rows: each item in these arrays is a 3072-element # vector representing 3 colors per image pixel. X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print 'Reshaped training data shape: ', X_train.shape, X_train.dtype print 'Reshaped test data shape: ', X_test.shape, X_test.dtype