newsgroups_all = fetch_20newsgroups(subset='all', categories=cats) vectorizer = TfidfVectorizer() vectors = vectorizer.fit_transform(newsgroups_all.data) X = vectors.toarray() y = newsgroups_all.target # only take 800 for training and 200 for testing X_train = X[0:800, :] X_test = X[800:1000, :] y_train = y[0:800] y_test = y[800:1000] kernel = GaussianKernel(sigma=1) # to change kpca = KernelPCA(kernel) kpca.fit(X_train) n_components = 2 # to change X_train_proj = kpca.predict(X_train, components=n_components) X_test_proj = kpca.predict(X_test, components=n_components) permuts = numpy.array([[0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1], [2, 1, 0]]) def find_permut_for_prediction(y_pred, y, permuts): y_pred_best = y_pred accuracy_best = 0. permut_best = permuts[0, :] for i in range(0, len(permuts)): y_pred_current = [permuts[i, e] for e in y_pred] accuracy = 1.0 * numpy.sum(numpy.equal(y_pred_current, y)) / len(y) if accuracy > accuracy_best: accuracy_best = accuracy
class KernelDescriptorsExtractor: def __init__(self, gamma_o=5, gamma_c=4, gamma_b=2, gamma_p=3, grid_o_dim=25, grid_c_dims=(5, 5, 5), grid_p_dims=(5, 5), epsilon_g=0.8, epsilon_s=0.2): print "basis for orientation" k_o = GaussianKernelForAngle(1 / numpy.sqrt(2 * gamma_o)) self.projector_o = FeatureVectorProjection(k_o) X = numpy.linspace(-numpy.pi, numpy.pi, grid_o_dim + 1)[:-1] X = X[:, numpy.newaxis] self.projector_o.fit(X) print "basis for color" k_c = GaussianKernel(1 / numpy.sqrt(2 * gamma_c)) self.projector_c = FeatureVectorProjection(k_c) r_step = 1.0 / (grid_c_dims[0] - 1) g_step = 1.0 / (grid_c_dims[1] - 1) b_step = 1.0 / (grid_c_dims[2] - 1) X = numpy.mgrid[0:1 + r_step:r_step, 0:1 + g_step:g_step, 0:1 + b_step:b_step].reshape(3, -1).T self.projector_c.fit(X) print "basis for binary patterns" k_b = GaussianKernel(1 / numpy.sqrt(2 * gamma_b)) self.projector_b = FeatureVectorProjection(k_b) X = numpy.mgrid[0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1].reshape(8, -1).T self.projector_b.fit(X) print "basis for positions" k_p = GaussianKernel(1 / numpy.sqrt(2 * gamma_p)) self.projector_p = FeatureVectorProjection(k_p) x_step = 1.0 / (grid_p_dims[0] - 1) y_step = 1.0 / (grid_p_dims[1] - 1) X = numpy.mgrid[0:1 + x_step:x_step, 0:1 + y_step:y_step].reshape(2, -1).T self.projector_p.fit(X) self.epsilon_g = epsilon_g self.epsilon_s = epsilon_s kpca_kernel = GaussianKernel(0.4) X_p = self.projector_p.predict(self.projector_p.basis) kdes_dim = self.projector_o.ndim * self.projector_p.ndim X_o = self.projector_o.predict(self.projector_o.basis) X_op = numpy.zeros((kdes_dim, kdes_dim)) for i, (x, y) in enumerate(zip(X_o, X_p)): X_op[i, :] = numpy.kron(x, y) self.kpca_op = KernelPCA(kpca_kernel) self.kpca_op.fit(X_op) kdes_dim = self.projector_c.ndim * self.projector_p.ndim X_c = self.projector_c.predict(self.projector_c.basis) X_cp = numpy.zeros((kdes_dim, kdes_dim)) pos = 0 for x in X_c: for y in X_p: X_cp[pos, :] = numpy.kron(x, y) pos += 1 self.kpca_cp = KernelPCA(kpca_kernel) self.kpca_cp.fit(X_cp) def _calc_gradient_match_kernel_for_image(self, I, patch_size, subsample): nX, nY, nchannels = I.shape # precalculate magnitude and angle of gradient in each pixel Ig_magnitude = numpy.zeros(I.shape[0:2]) Ig_angle = numpy.zeros(I.shape[0:2]) for i in range(nX): for j in range(nY): chosen_dx, chosen_dy, chosen_magnitude = 0, 0, 0 for c in range(nchannels): dx, dy = 0, 0 if i < nX - 1: dx += I[i + 1, j, c] if i > 0: dx -= I[i - 1, j, c] if j < nY - 1: dy += I[i, j + 1, c] if j > 0: dy -= I[i, j - 1, c] magnitude = dx**2 + dy**2 if magnitude > chosen_magnitude: chosen_magnitude = magnitude chosen_dx = dx chosen_dy = dy Ig_magnitude[i, j] = numpy.sqrt(magnitude) Ig_angle[i, j] = numpy.arctan2(dx, dy) x_step = 1.0 / (patch_size[0] - 1) y_step = 1.0 / (patch_size[1] - 1) X_p = numpy.mgrid[0:1 + x_step:x_step, 0:1 + y_step:y_step].reshape(2, -1).T X_p = self.projector_p.predict(X_p) patch_x = numpy.arange(patch_size[0]).repeat(patch_size[1]) patch_y = numpy.tile(numpy.arange(patch_size[1]), patch_size[0]) kdes_dims = self.projector_o.ndim * self.projector_p.ndim ret = numpy.zeros((9, kdes_dims)) pos = 0 for sx in range(0, nX - patch_size[0] + 1, subsample[0]): for sy in range(0, nY - patch_size[1] + 1, subsample[1]): norm = numpy.sum(Ig_magnitude[sx:sx + patch_size[0], sy:sy + patch_size[1]]**2) norm = numpy.sqrt(self.epsilon_g + norm) X_o = Ig_angle[sx:sx + patch_size[0], sy:sy + patch_size[1]].reshape(-1) X_o = X_o[:, numpy.newaxis] X_o = self.projector_o.predict(X_o) aux = numpy.zeros(kdes_dims) for x_o, x_p, x, y in zip(X_o, X_p, patch_x, patch_y): aux += Ig_magnitude[x, y] * numpy.kron(x_o, x_p) ret[pos, :] = aux / norm pos += 1 return self.kpca_op.predict(ret, components=200).flatten() def _calc_color_match_kernel_for_image(self, I, patch_size, subsample): nX, nY, nchannels = I.shape x_step = 1.0 / (patch_size[0] - 1) y_step = 1.0 / (patch_size[1] - 1) X_p = numpy.mgrid[0:1 + x_step:x_step, 0:1 + y_step:y_step].reshape(2, -1).T X_p = self.projector_p.predict(X_p) patch_x = numpy.arange(patch_size[0]).repeat(patch_size[1]) patch_y = numpy.tile(numpy.arange(patch_size[1]), patch_size[0]) X_c = numpy.zeros((patch_size[0] * patch_size[1], 3)) kdes_dims = self.projector_c.ndim * self.projector_p.ndim ret = numpy.zeros((9, kdes_dims)) pos = 0 for sx in range(0, nX - patch_size[0] + 1, subsample[0]): for sy in range(0, nY - patch_size[1] + 1, subsample[1]): for i, (x, y) in enumerate(zip(patch_x, patch_y)): X_c[i, :] = I[x, y, :] X_c_proj = self.projector_c.predict(X_c) aux = numpy.zeros(kdes_dims) for x_c, x_p in zip(X_c_proj, X_p): aux += numpy.kron(x_c, x_p) ret[pos, :] = aux pos += 1 return self.kpca_cp.predict(ret, components=200).flatten() def predict(self, X, patch_size=(16, 16), subsample=(8, 8), match_kernel='gradient'): assert X.ndim == 4 n = X.shape[0] print "Match kernel: %s" % match_kernel if match_kernel == 'gradient': X_grad = [] for i in tqdm(range(n)): X_grad.append( self._calc_gradient_match_kernel_for_image( X[i, :, :, :], patch_size, subsample)) X_grad = numpy.array(X_grad) return X_grad elif match_kernel == 'color': X_color = [] for i in tqdm(range(n)): X_color.append( self._calc_color_match_kernel_for_image( X[i, :, :, :], patch_size, subsample)) return X_color else: raise Exception("Unknown match kernel")
def load_features(feature_extractor_name, overwrite_features=True, overwrite_kpca=True, do_kpca=False, kpca_kernel=None, cut_percentage=90, folder_name='data/'): Xtrain, Ytrain, Xtest = load_data(folder_name) if not overwrite_features and not overwrite_kpca and do_kpca: assert kpca_kernel is not None kernel_name = kpca_kernel.name file_suffix = '_' + feature_extractor_name + '_' + kernel_name + '.npy' if os.path.isfile(folder_name + 'Xtrain' + file_suffix) \ and os.path.isfile(folder_name + 'Xtest' + file_suffix): Xtrain = numpy.load(folder_name + 'Xtrain' + file_suffix) Xtest = numpy.load(folder_name + 'Xtest' + file_suffix) return Xtrain, Ytrain, Xtest feature_extractor = get_feature_extractor(feature_extractor_name) if feature_extractor_name == 'hog_fisher' or feature_extractor_name == 'sift_fisher': if not overwrite_features and os.path.isfile(folder_name + 'Xtrain_' + feature_extractor_name + '.npy') \ and os.path.isfile(folder_name + 'Xtest_' + feature_extractor_name + '.npy'): Xtrain = numpy.load(folder_name + 'Xtrain_' + feature_extractor_name + '.npy') Xtest = numpy.load(folder_name + 'Xtest_' + feature_extractor_name + '.npy') else: Xtrain, V_truncate, gmm = feature_extractor.train(Xtrain) Xtest = feature_extractor.predict(Xtest, V_truncate, gmm) numpy.save(folder_name + 'Xtrain_' + feature_extractor_name, Xtrain) numpy.save(folder_name + 'Xtest_' + feature_extractor_name, Xtest) elif feature_extractor_name == 'bag_of_words_hog': if not overwrite_features and os.path.isfile(folder_name + 'Xtrain_' + feature_extractor_name + '.npy') \ and os.path.isfile(folder_name + 'Xtest_' + feature_extractor_name + '.npy'): Xtrain = numpy.load(folder_name + 'Xtrain_' + feature_extractor_name + '.npy') Xtest = numpy.load(folder_name + 'Xtest_' + feature_extractor_name + '.npy') else: Xtrain = feature_extractor.extract(Xtrain) Xtest = feature_extractor.extract(Xtest) feature_extractor.fit(Xtrain) Xtrain = feature_extractor.predict(Xtrain) Xtest = feature_extractor.predict(Xtest) numpy.save(folder_name + 'Xtrain_' + feature_extractor_name, Xtrain) numpy.save(folder_name + 'Xtest_' + feature_extractor_name, Xtest) elif feature_extractor is not None: if not overwrite_features and os.path.isfile(folder_name + 'Xtrain_' + feature_extractor_name + '.npy'): Xtrain = numpy.load(folder_name + 'Xtrain_' + feature_extractor_name + '.npy') else: Xtrain = feature_extractor.predict(Xtrain) numpy.save(folder_name + 'Xtrain_' + feature_extractor_name, Xtrain) if not overwrite_features and os.path.isfile(folder_name + 'Xtest_' + feature_extractor_name + '.npy'): Xtest = numpy.load(folder_name + 'Xtest_' + feature_extractor_name + '.npy') else: Xtest = feature_extractor.predict(Xtest) numpy.save(folder_name + 'Xtest_' + feature_extractor_name, Xtest) if do_kpca: kpca = KernelPCA(kpca_kernel) kpca.fit(Xtrain, cut_percentage=cut_percentage) Xtrain = kpca.predict(Xtrain) Xtest = kpca.predict(Xtest) kernel_name = kpca_kernel.name file_suffix = '_' + feature_extractor_name + '_' + kernel_name + '.npy' numpy.save(folder_name + 'Xtrain' + file_suffix, Xtrain) numpy.save(folder_name + 'Xtest' + file_suffix, Xtest) return Xtrain, Ytrain, Xtest