Python OneHotEncoder.dot Examples

Programming Language: Python

Namespace/Package Name: sklearn.preprocessing

Class/Type: OneHotEncoder

Method/Function: dot

Examples at hotexamples.com: 2

Python OneHotEncoder.dot - 2 examples found. These are the top rated real world Python examples of sklearn.preprocessing.OneHotEncoder.dot extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

OneHotEncoder(30)

astype(5)

_fit_transform(4)

categories_(4)

_legacy_mode(2)

categories(2)

classes_(2)

dot(2)

drop(2)

drop_idx_(2)

__init__(1)

append(1)

apply(1)

Example #1

Show file

    def run(self, X, y=None):
        """
            Fits filter

            Parameters
            ----------
            X : numpy array, shape (n_samples, n_features)
                The training input samples.
            y : numpy array, shape (n_samples) or (n_samples, n_classes), optional
                The target values or their one-hot encoding that are used to compute F. If not present, a k-means clusterization algorithm is used.
                If present, n_classes should be equal to c.

            Returns
            ----------
            W : array-like, shape (n_features, c)
                Feature weight matrix.

            See Also
            --------

            Examples
            --------
            >>> from ITMO_FS.filters.sparse import NDFS
            >>> import numpy as np
            >>> X = np.array([[1, 2, 3, 3, 1],[2, 2, 3, 3, 2], [1, 3, 3, 1, 3],\
[3, 1, 3, 1, 4],[4, 4, 3, 1, 5]], dtype = np.integer)
            >>> y = np.array([1, 2, 3, 4, 5], dtype=np.integer)
            >>> model = NDFS(p=5, c=2)
            >>> weights = model.run(X)
            >>> model.feature_ranking(weights)
        """

        n_samples, n_features = X.shape
        graph = NearestNeighbors(
            n_neighbors=self.p + 1,
            algorithm='ball_tree').fit(X).kneighbors_graph(X).toarray()
        graph = graph + graph.T

        indices = [[(i, j) for j in range(n_samples)]
                   for i in range(n_samples)]
        func = np.vectorize(
            lambda xy: graph[xy[0]][xy[1]] * self.__scheme(X[xy[0]], X[xy[1]]),
            signature='(1)->()')
        S = func(indices)

        A = np.diag(S.sum(axis=0))
        L = power_neg_half(A).dot(A - S).dot(power_neg_half(A))

        if y is not None:
            if len(y.shape) == 2:
                Y = y
            else:
                Y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()
        else:
            Y = self.__run_kmeans(X)
        F = Y.dot(power_neg_half(Y.T.dot(Y)))
        D = np.eye(n_features)
        I = np.eye(n_samples)

        previous_target = 0
        W = np.zeros(n_features)
        for step in range(self.max_iterations):
            M = L + self.alpha * (
                I - X.dot(np.linalg.inv(X.T.dot(X) + self.beta * D)).dot(X.T))
            F = F * ((self.gamma * F) /
                     (M.dot(F) + self.gamma * F.dot(F.T).dot(F)))
            W = np.linalg.inv(X.T.dot(X) + self.beta * D).dot(X.T.dot(F))
            diag = 2 * matrix_norm(W)
            diag[diag < 1e-10] = 1e-10  # prevents division by zero
            D = np.diag(1 / diag)

            target = np.trace(F.T.dot(L).dot(F)) + self.alpha * (
                np.linalg.norm(X.dot(W) - F) + self.beta * l21_norm(W))
            if step > 0 and abs(target - previous_target) < self.epsilon:
                break
            previous_target = target

        return W

Example #2

Show file

    def _fit(self, X, y, **kwargs):
        """Fit the filter.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_classes)
            The target values or their one-hot encoding that are used to
            compute F. If not present, a k-means clusterization algorithm
            is used. If present, n_classes should be equal to c.

        Returns
        -------
        None
        """
        n_samples = X.shape[0]

        if self.k >= n_samples:
            getLogger(__name__).error(
                "Cannot select %d nearest neighbors with n_samples = %d",
                self.k, n_samples)
            raise ValueError(
                "Cannot select %d nearest neighbors with n_samples = %d" %
                (self.k, n_samples))

        graph = NearestNeighbors(
            n_neighbors=self.k,
            algorithm='ball_tree').fit(X).kneighbors_graph().toarray()
        graph = np.minimum(1, graph + graph.T)
        getLogger(__name__).info("Nearest neighbors graph: %s", graph)

        S = graph * pairwise_distances(X,
                                       metric=lambda x, y: self.__scheme(x, y))
        getLogger(__name__).info("S: %s", S)
        A = np.diag(S.sum(axis=0))
        getLogger(__name__).info("A: %s", A)
        L = power_neg_half(A).dot(A - S).dot(power_neg_half(A))
        getLogger(__name__).info("L: %s", L)

        if y is not None:
            if len(y.shape) == 2:
                Y = y
            else:
                Y = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()
        else:
            if self.c > n_samples:
                getLogger(__name__).error(
                    "Cannot find %d clusters with n_samples = %d", self.c,
                    n_samples)
                raise ValueError(
                    "Cannot find %d clusters with n_samples = %d" %
                    (self.c, n_samples))
            Y = self.__run_kmeans(X)
        getLogger(__name__).info("Transformed Y: %s", Y)
        F = Y.dot(power_neg_half(Y.T.dot(Y)))
        getLogger(__name__).info("F: %s", F)
        D = np.eye(self.n_features_)
        In = np.eye(n_samples)
        Ic = np.eye(Y.shape[1])

        previous_target = -1
        for _ in range(self.max_iterations):
            M = (L + self.alpha *
                 (In -
                  X.dot(np.linalg.inv(X.T.dot(X) + self.beta * D)).dot(X.T)))
            getLogger(__name__).info("M: %s", M)
            F = (F * ((self.gamma * F) /
                      (M.dot(F) + self.gamma * F.dot(F.T).dot(F))))
            getLogger(__name__).info("F: %s", F)
            W = np.linalg.inv(X.T.dot(X) + self.beta * D).dot(X.T.dot(F))
            getLogger(__name__).info("W: %s", W)
            diag = 2 * matrix_norm(W)
            diag[diag < 1e-10] = 1e-10  # prevents division by zero
            D = np.diag(1 / diag)
            getLogger(__name__).info("D: %s", D)

            target = (
                np.trace(F.T.dot(L).dot(F)) + self.alpha *
                (np.linalg.norm(X.dot(W) - F)**2 + self.beta * l21_norm(W)) +
                self.gamma * (np.linalg.norm(F.T.dot(F) - Ic)**2) / 2)
            getLogger(__name__).info("New target value: %d", target)
            if abs(target - previous_target) < self.epsilon:
                break
            previous_target = target

        getLogger(__name__).info("Ended up with W: %s", W)
        self.feature_scores_ = matrix_norm(W)
        getLogger(__name__).info("Feature scores: %s", self.feature_scores_)
        ranking = np.argsort(self.feature_scores_)[::-1]
        self.selected_features_ = ranking[:self.n_features]