Esempio n. 1
0
    def _fit_transform(self, X):
        X = check_array(X)
        self.nbrs_.fit(X)
        self.training_data_ = self.nbrs_._fit_X
        self.kernel_pca_ = KernelPCA(n_components=self.n_components,
                                     kernel="precomputed",
                                     eigen_solver=self.eigen_solver,
                                     tol=self.tol, max_iter=self.max_iter)

        kng = kneighbors_graph(self.nbrs_, self.n_neighbors,
                               mode='distance')

        self.dist_matrix_ = graph_shortest_path(kng,
                                                method=self.path_method,
                                                directed=False)
        G = self.dist_matrix_ ** 2
        G *= -0.5

        self.embedding_ = self.kernel_pca_.fit_transform(G)
Esempio n. 2
0
    def __init__(self,
                 gamma_o=5,
                 gamma_c=4,
                 gamma_b=2,
                 gamma_p=3,
                 grid_o_dim=25,
                 grid_c_dims=(5, 5, 5),
                 grid_p_dims=(5, 5),
                 epsilon_g=0.8,
                 epsilon_s=0.2):
        print "basis for orientation"
        k_o = GaussianKernelForAngle(1 / numpy.sqrt(2 * gamma_o))
        self.projector_o = FeatureVectorProjection(k_o)
        X = numpy.linspace(-numpy.pi, numpy.pi, grid_o_dim + 1)[:-1]
        X = X[:, numpy.newaxis]
        self.projector_o.fit(X)

        print "basis for color"
        k_c = GaussianKernel(1 / numpy.sqrt(2 * gamma_c))
        self.projector_c = FeatureVectorProjection(k_c)
        r_step = 1.0 / (grid_c_dims[0] - 1)
        g_step = 1.0 / (grid_c_dims[1] - 1)
        b_step = 1.0 / (grid_c_dims[2] - 1)
        X = numpy.mgrid[0:1 + r_step:r_step, 0:1 + g_step:g_step,
                        0:1 + b_step:b_step].reshape(3, -1).T
        self.projector_c.fit(X)

        print "basis for binary patterns"
        k_b = GaussianKernel(1 / numpy.sqrt(2 * gamma_b))
        self.projector_b = FeatureVectorProjection(k_b)
        X = numpy.mgrid[0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1,
                        0:2:1].reshape(8, -1).T
        self.projector_b.fit(X)

        print "basis for positions"
        k_p = GaussianKernel(1 / numpy.sqrt(2 * gamma_p))
        self.projector_p = FeatureVectorProjection(k_p)
        x_step = 1.0 / (grid_p_dims[0] - 1)
        y_step = 1.0 / (grid_p_dims[1] - 1)
        X = numpy.mgrid[0:1 + x_step:x_step,
                        0:1 + y_step:y_step].reshape(2, -1).T
        self.projector_p.fit(X)

        self.epsilon_g = epsilon_g
        self.epsilon_s = epsilon_s

        kpca_kernel = GaussianKernel(0.4)
        X_p = self.projector_p.predict(self.projector_p.basis)

        kdes_dim = self.projector_o.ndim * self.projector_p.ndim
        X_o = self.projector_o.predict(self.projector_o.basis)
        X_op = numpy.zeros((kdes_dim, kdes_dim))
        for i, (x, y) in enumerate(zip(X_o, X_p)):
            X_op[i, :] = numpy.kron(x, y)
        self.kpca_op = KernelPCA(kpca_kernel)
        self.kpca_op.fit(X_op)

        kdes_dim = self.projector_c.ndim * self.projector_p.ndim
        X_c = self.projector_c.predict(self.projector_c.basis)
        X_cp = numpy.zeros((kdes_dim, kdes_dim))
        pos = 0
        for x in X_c:
            for y in X_p:
                X_cp[pos, :] = numpy.kron(x, y)
                pos += 1
        self.kpca_cp = KernelPCA(kpca_kernel)
        self.kpca_cp.fit(X_cp)
Esempio n. 3
0
class KernelDescriptorsExtractor:
    def __init__(self,
                 gamma_o=5,
                 gamma_c=4,
                 gamma_b=2,
                 gamma_p=3,
                 grid_o_dim=25,
                 grid_c_dims=(5, 5, 5),
                 grid_p_dims=(5, 5),
                 epsilon_g=0.8,
                 epsilon_s=0.2):
        print "basis for orientation"
        k_o = GaussianKernelForAngle(1 / numpy.sqrt(2 * gamma_o))
        self.projector_o = FeatureVectorProjection(k_o)
        X = numpy.linspace(-numpy.pi, numpy.pi, grid_o_dim + 1)[:-1]
        X = X[:, numpy.newaxis]
        self.projector_o.fit(X)

        print "basis for color"
        k_c = GaussianKernel(1 / numpy.sqrt(2 * gamma_c))
        self.projector_c = FeatureVectorProjection(k_c)
        r_step = 1.0 / (grid_c_dims[0] - 1)
        g_step = 1.0 / (grid_c_dims[1] - 1)
        b_step = 1.0 / (grid_c_dims[2] - 1)
        X = numpy.mgrid[0:1 + r_step:r_step, 0:1 + g_step:g_step,
                        0:1 + b_step:b_step].reshape(3, -1).T
        self.projector_c.fit(X)

        print "basis for binary patterns"
        k_b = GaussianKernel(1 / numpy.sqrt(2 * gamma_b))
        self.projector_b = FeatureVectorProjection(k_b)
        X = numpy.mgrid[0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1, 0:2:1,
                        0:2:1].reshape(8, -1).T
        self.projector_b.fit(X)

        print "basis for positions"
        k_p = GaussianKernel(1 / numpy.sqrt(2 * gamma_p))
        self.projector_p = FeatureVectorProjection(k_p)
        x_step = 1.0 / (grid_p_dims[0] - 1)
        y_step = 1.0 / (grid_p_dims[1] - 1)
        X = numpy.mgrid[0:1 + x_step:x_step,
                        0:1 + y_step:y_step].reshape(2, -1).T
        self.projector_p.fit(X)

        self.epsilon_g = epsilon_g
        self.epsilon_s = epsilon_s

        kpca_kernel = GaussianKernel(0.4)
        X_p = self.projector_p.predict(self.projector_p.basis)

        kdes_dim = self.projector_o.ndim * self.projector_p.ndim
        X_o = self.projector_o.predict(self.projector_o.basis)
        X_op = numpy.zeros((kdes_dim, kdes_dim))
        for i, (x, y) in enumerate(zip(X_o, X_p)):
            X_op[i, :] = numpy.kron(x, y)
        self.kpca_op = KernelPCA(kpca_kernel)
        self.kpca_op.fit(X_op)

        kdes_dim = self.projector_c.ndim * self.projector_p.ndim
        X_c = self.projector_c.predict(self.projector_c.basis)
        X_cp = numpy.zeros((kdes_dim, kdes_dim))
        pos = 0
        for x in X_c:
            for y in X_p:
                X_cp[pos, :] = numpy.kron(x, y)
                pos += 1
        self.kpca_cp = KernelPCA(kpca_kernel)
        self.kpca_cp.fit(X_cp)

    def _calc_gradient_match_kernel_for_image(self, I, patch_size, subsample):
        nX, nY, nchannels = I.shape

        # precalculate magnitude and angle of gradient in each pixel
        Ig_magnitude = numpy.zeros(I.shape[0:2])
        Ig_angle = numpy.zeros(I.shape[0:2])
        for i in range(nX):
            for j in range(nY):
                chosen_dx, chosen_dy, chosen_magnitude = 0, 0, 0

                for c in range(nchannels):
                    dx, dy = 0, 0
                    if i < nX - 1:
                        dx += I[i + 1, j, c]
                    if i > 0:
                        dx -= I[i - 1, j, c]
                    if j < nY - 1:
                        dy += I[i, j + 1, c]
                    if j > 0:
                        dy -= I[i, j - 1, c]
                    magnitude = dx**2 + dy**2

                    if magnitude > chosen_magnitude:
                        chosen_magnitude = magnitude
                        chosen_dx = dx
                        chosen_dy = dy

                Ig_magnitude[i, j] = numpy.sqrt(magnitude)
                Ig_angle[i, j] = numpy.arctan2(dx, dy)

        x_step = 1.0 / (patch_size[0] - 1)
        y_step = 1.0 / (patch_size[1] - 1)
        X_p = numpy.mgrid[0:1 + x_step:x_step,
                          0:1 + y_step:y_step].reshape(2, -1).T
        X_p = self.projector_p.predict(X_p)

        patch_x = numpy.arange(patch_size[0]).repeat(patch_size[1])
        patch_y = numpy.tile(numpy.arange(patch_size[1]), patch_size[0])

        kdes_dims = self.projector_o.ndim * self.projector_p.ndim
        ret = numpy.zeros((9, kdes_dims))
        pos = 0
        for sx in range(0, nX - patch_size[0] + 1, subsample[0]):
            for sy in range(0, nY - patch_size[1] + 1, subsample[1]):
                norm = numpy.sum(Ig_magnitude[sx:sx + patch_size[0],
                                              sy:sy + patch_size[1]]**2)
                norm = numpy.sqrt(self.epsilon_g + norm)

                X_o = Ig_angle[sx:sx + patch_size[0],
                               sy:sy + patch_size[1]].reshape(-1)
                X_o = X_o[:, numpy.newaxis]
                X_o = self.projector_o.predict(X_o)

                aux = numpy.zeros(kdes_dims)
                for x_o, x_p, x, y in zip(X_o, X_p, patch_x, patch_y):
                    aux += Ig_magnitude[x, y] * numpy.kron(x_o, x_p)
                ret[pos, :] = aux / norm
                pos += 1

        return self.kpca_op.predict(ret, components=200).flatten()

    def _calc_color_match_kernel_for_image(self, I, patch_size, subsample):
        nX, nY, nchannels = I.shape

        x_step = 1.0 / (patch_size[0] - 1)
        y_step = 1.0 / (patch_size[1] - 1)
        X_p = numpy.mgrid[0:1 + x_step:x_step,
                          0:1 + y_step:y_step].reshape(2, -1).T
        X_p = self.projector_p.predict(X_p)

        patch_x = numpy.arange(patch_size[0]).repeat(patch_size[1])
        patch_y = numpy.tile(numpy.arange(patch_size[1]), patch_size[0])

        X_c = numpy.zeros((patch_size[0] * patch_size[1], 3))

        kdes_dims = self.projector_c.ndim * self.projector_p.ndim
        ret = numpy.zeros((9, kdes_dims))
        pos = 0
        for sx in range(0, nX - patch_size[0] + 1, subsample[0]):
            for sy in range(0, nY - patch_size[1] + 1, subsample[1]):
                for i, (x, y) in enumerate(zip(patch_x, patch_y)):
                    X_c[i, :] = I[x, y, :]
                X_c_proj = self.projector_c.predict(X_c)

                aux = numpy.zeros(kdes_dims)
                for x_c, x_p in zip(X_c_proj, X_p):
                    aux += numpy.kron(x_c, x_p)
                ret[pos, :] = aux
                pos += 1

        return self.kpca_cp.predict(ret, components=200).flatten()

    def predict(self,
                X,
                patch_size=(16, 16),
                subsample=(8, 8),
                match_kernel='gradient'):
        assert X.ndim == 4
        n = X.shape[0]
        print "Match kernel: %s" % match_kernel

        if match_kernel == 'gradient':
            X_grad = []
            for i in tqdm(range(n)):
                X_grad.append(
                    self._calc_gradient_match_kernel_for_image(
                        X[i, :, :, :], patch_size, subsample))
            X_grad = numpy.array(X_grad)
            return X_grad
        elif match_kernel == 'color':
            X_color = []
            for i in tqdm(range(n)):
                X_color.append(
                    self._calc_color_match_kernel_for_image(
                        X[i, :, :, :], patch_size, subsample))
            return X_color
        else:
            raise Exception("Unknown match kernel")
Esempio n. 4
0
from kmeans import Kmeans

cats = ['sci.med', 'misc.forsale', 'soc.religion.christian']
newsgroups_all = fetch_20newsgroups(subset='all', categories=cats)
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(newsgroups_all.data)
X = vectors.toarray()
y = newsgroups_all.target
# only take 800 for training and 200 for testing
X_train = X[0:800, :]
X_test = X[800:1000, :]
y_train = y[0:800]
y_test = y[800:1000]

kernel = GaussianKernel(sigma=1)  # to change
kpca = KernelPCA(kernel)
kpca.fit(X_train)
n_components = 2  # to change
X_train_proj = kpca.predict(X_train, components=n_components)
X_test_proj = kpca.predict(X_test, components=n_components)

permuts = numpy.array([[0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1],
                       [2, 1, 0]])


def find_permut_for_prediction(y_pred, y, permuts):
    y_pred_best = y_pred
    accuracy_best = 0.
    permut_best = permuts[0, :]
    for i in range(0, len(permuts)):
        y_pred_current = [permuts[i, e] for e in y_pred]
Esempio n. 5
0
class Isomap(BaseEstimator, TransformerMixin):
    """Isomap Embedding

    Non-linear dimensionality reduction through Isometric Mapping

    Parameters
    ----------
    n_neighbors : integer
        number of neighbors to consider for each point.

    n_components : integer
        number of coordinates for the manifold

    eigen_solver : ['auto'|'arpack'|'dense']
        'auto' : Attempt to choose the most efficient solver
        for the given problem.

        'arpack' : Use Arnoldi decomposition to find the eigenvalues
        and eigenvectors.

        'dense' : Use a direct solver (i.e. LAPACK)
        for the eigenvalue decomposition.

    tol : float
        Convergence tolerance passed to arpack or lobpcg.
        not used if eigen_solver == 'dense'.

    max_iter : integer
        Maximum number of iterations for the arpack solver.
        not used if eigen_solver == 'dense'.

    path_method : string ['auto'|'FW'|'D']
        Method to use in finding shortest path.

        'auto' : attempt to choose the best algorithm automatically.

        'FW' : Floyd-Warshall algorithm.

        'D' : Dijkstra's algorithm.

    neighbors_algorithm : string ['auto'|'brute'|'kd_tree'|'ball_tree']
        Algorithm to use for nearest neighbors search,
        passed to neighbors.NearestNeighbors instance.

    Attributes
    ----------
    embedding_ : array-like, shape (n_samples, n_components)
        Stores the embedding vectors.

    kernel_pca_ : object
        `KernelPCA` object used to implement the embedding.

    training_data_ : array-like, shape (n_samples, n_features)
        Stores the training data.

    nbrs_ : sklearn.neighbors.NearestNeighbors instance
        Stores nearest neighbors instance, including BallTree or KDtree
        if applicable.

    dist_matrix_ : array-like, shape (n_samples, n_samples)
        Stores the geodesic distance matrix of training data.

    References
    ----------

    .. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric
           framework for nonlinear dimensionality reduction. Science 290 (5500)
    """

    def __init__(self, n_neighbors=5, n_components=2, eigen_solver='auto',
                 tol=0, max_iter=None, path_method='auto',
                 neighbors_algorithm='auto'):

        self.n_neighbors = n_neighbors
        self.n_components = n_components
        self.eigen_solver = eigen_solver
        self.tol = tol
        self.max_iter = max_iter
        self.path_method = path_method
        self.neighbors_algorithm = neighbors_algorithm
        self.nbrs_ = NearestNeighbors(n_neighbors=n_neighbors,
                                      algorithm=neighbors_algorithm)

    def _fit_transform(self, X):
        X = check_array(X)
        self.nbrs_.fit(X)
        self.training_data_ = self.nbrs_._fit_X
        self.kernel_pca_ = KernelPCA(n_components=self.n_components,
                                     kernel="precomputed",
                                     eigen_solver=self.eigen_solver,
                                     tol=self.tol, max_iter=self.max_iter)

        kng = kneighbors_graph(self.nbrs_, self.n_neighbors,
                               mode='distance')

        self.dist_matrix_ = graph_shortest_path(kng,
                                                method=self.path_method,
                                                directed=False)
        G = self.dist_matrix_ ** 2
        G *= -0.5

        self.embedding_ = self.kernel_pca_.fit_transform(G)

    def reconstruction_error(self):
        """Compute the reconstruction error for the embedding.

        Returns
        -------
        reconstruction_error : float

        Notes
        -------
        The cost function of an isomap embedding is

        ``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``

        Where D is the matrix of distances for the input data X,
        D_fit is the matrix of distances for the output embedding X_fit,
        and K is the isomap kernel:

        ``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``
        """
        G = -0.5 * self.dist_matrix_ ** 2
        G_center = KernelCenterer().fit_transform(G)
        evals = self.kernel_pca_.lambdas_
        return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0]

    def fit(self, X, y=None):
        """Compute the embedding vectors for data X

        Parameters
        ----------
        X : {array-like, sparse matrix, BallTree, KDTree, NearestNeighbors}
            Sample data, shape = (n_samples, n_features), in the form of a
            numpy array, precomputed tree, or NearestNeighbors
            object.

        Returns
        -------
        self : returns an instance of self.
        """
        self._fit_transform(X)
        return self

    def fit_transform(self, X, y=None):
        """Fit the model from data in X and transform X.

        Parameters
        ----------
        X: {array-like, sparse matrix, BallTree, KDTree}
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        self._fit_transform(X)
        return self.embedding_

    def transform(self, X):
        """Transform X.

        This is implemented by linking the points X into the graph of geodesic
        distances of the training data. First the `n_neighbors` nearest
        neighbors of X are found in the training data, and from these the
        shortest geodesic distances from each point in X to each point in
        the training data are computed in order to construct the kernel.
        The embedding of X is the projection of this kernel onto the
        embedding vectors of the training set.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)

        Returns
        -------
        X_new: array-like, shape (n_samples, n_components)
        """
        X = check_array(X)
        distances, indices = self.nbrs_.kneighbors(X, return_distance=True)

        #Create the graph of shortest distances from X to self.training_data_
        # via the nearest neighbors of X.
        #This can be done as a single array operation, but it potentially
        # takes a lot of memory.  To avoid that, use a loop:
        G_X = np.zeros((X.shape[0], self.training_data_.shape[0]))
        for i in range(X.shape[0]):
            G_X[i] = np.min((self.dist_matrix_[indices[i]]
                             + distances[i][:, None]), 0)

        G_X **= 2
        G_X *= -0.5

        return self.kernel_pca_.transform(G_X)
Esempio n. 6
0
def load_features(feature_extractor_name,
                  overwrite_features=True,
                  overwrite_kpca=True,
                  do_kpca=False,
                  kpca_kernel=None,
                  cut_percentage=90,
                  folder_name='data/'):
    Xtrain, Ytrain, Xtest = load_data(folder_name)

    if not overwrite_features and not overwrite_kpca and do_kpca:
        assert kpca_kernel is not None
        kernel_name = kpca_kernel.name
        file_suffix = '_' + feature_extractor_name + '_' + kernel_name + '.npy'

        if os.path.isfile(folder_name + 'Xtrain' + file_suffix) \
                and os.path.isfile(folder_name + 'Xtest' + file_suffix):
            Xtrain = numpy.load(folder_name + 'Xtrain' + file_suffix)
            Xtest = numpy.load(folder_name + 'Xtest' + file_suffix)
            return Xtrain, Ytrain, Xtest

    feature_extractor = get_feature_extractor(feature_extractor_name)
    if feature_extractor_name == 'hog_fisher' or feature_extractor_name == 'sift_fisher':
        if not overwrite_features and os.path.isfile(folder_name + 'Xtrain_' + feature_extractor_name + '.npy') \
                and os.path.isfile(folder_name + 'Xtest_' + feature_extractor_name + '.npy'):
            Xtrain = numpy.load(folder_name + 'Xtrain_' +
                                feature_extractor_name + '.npy')
            Xtest = numpy.load(folder_name + 'Xtest_' +
                               feature_extractor_name + '.npy')
        else:
            Xtrain, V_truncate, gmm = feature_extractor.train(Xtrain)
            Xtest = feature_extractor.predict(Xtest, V_truncate, gmm)
            numpy.save(folder_name + 'Xtrain_' + feature_extractor_name,
                       Xtrain)
            numpy.save(folder_name + 'Xtest_' + feature_extractor_name, Xtest)
    elif feature_extractor_name == 'bag_of_words_hog':
        if not overwrite_features and os.path.isfile(folder_name + 'Xtrain_' + feature_extractor_name + '.npy') \
                and os.path.isfile(folder_name + 'Xtest_' + feature_extractor_name + '.npy'):
            Xtrain = numpy.load(folder_name + 'Xtrain_' +
                                feature_extractor_name + '.npy')
            Xtest = numpy.load(folder_name + 'Xtest_' +
                               feature_extractor_name + '.npy')
        else:
            Xtrain = feature_extractor.extract(Xtrain)
            Xtest = feature_extractor.extract(Xtest)
            feature_extractor.fit(Xtrain)
            Xtrain = feature_extractor.predict(Xtrain)
            Xtest = feature_extractor.predict(Xtest)
            numpy.save(folder_name + 'Xtrain_' + feature_extractor_name,
                       Xtrain)
            numpy.save(folder_name + 'Xtest_' + feature_extractor_name, Xtest)
    elif feature_extractor is not None:
        if not overwrite_features and os.path.isfile(folder_name + 'Xtrain_' +
                                                     feature_extractor_name +
                                                     '.npy'):
            Xtrain = numpy.load(folder_name + 'Xtrain_' +
                                feature_extractor_name + '.npy')
        else:
            Xtrain = feature_extractor.predict(Xtrain)
            numpy.save(folder_name + 'Xtrain_' + feature_extractor_name,
                       Xtrain)

        if not overwrite_features and os.path.isfile(folder_name + 'Xtest_' +
                                                     feature_extractor_name +
                                                     '.npy'):
            Xtest = numpy.load(folder_name + 'Xtest_' +
                               feature_extractor_name + '.npy')
        else:
            Xtest = feature_extractor.predict(Xtest)
            numpy.save(folder_name + 'Xtest_' + feature_extractor_name, Xtest)

    if do_kpca:
        kpca = KernelPCA(kpca_kernel)
        kpca.fit(Xtrain, cut_percentage=cut_percentage)
        Xtrain = kpca.predict(Xtrain)
        Xtest = kpca.predict(Xtest)

        kernel_name = kpca_kernel.name
        file_suffix = '_' + feature_extractor_name + '_' + kernel_name + '.npy'
        numpy.save(folder_name + 'Xtrain' + file_suffix, Xtrain)
        numpy.save(folder_name + 'Xtest' + file_suffix, Xtest)

    return Xtrain, Ytrain, Xtest