Example #1
2
    def predict(self, X):
        """Predict regression target for X.

        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y: array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted values.
        """
        # Check data
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = array2d(X, dtype=DTYPE)

        # Assign chunk of trees to jobs
        n_jobs, n_trees, starts = _partition_estimators(self)

        # Parallel loop
        all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose,
                             backend="threading")(
            delayed(_parallel_predict_regression)(
                self.estimators_[starts[i]:starts[i + 1]], X)
            for i in range(n_jobs))

        # Reduce
        y_hat = sum(all_y_hat) / len(self.estimators_)

        return y_hat
Example #2
0
def procrustes_rotation(X1, X2, copy=True):
    """Apply optimal rotation and scaling matrix between two matrices.

    Parameters
    ----------
        X1, X2: array-likes with the same shape (n_samples, n_features)

    Returns
    -------
        X2_t : array-like, shape (n_samples, n_features)
    """
    X1 = as_float_array(array2d(X1), copy=copy)
    X2 = as_float_array(array2d(X2), copy=copy)

    X1_mean = X1.mean(0)
    X2_mean = X2.mean(0)

    X1 -= X1_mean
    X2 -= X2_mean

    X1_norm = linalg.norm(X1, 'fro')
    X2_norm = linalg.norm(X1, 'fro')

    X1 /= X1_norm
    X2 /= X2_norm

    U, S, V = linalg.svd(np.dot(X1.T, X2), full_matrices=False)
    U, V = svd_flip(U, V)
    R = np.dot(V.T, U.T)
    X2_t = np.sum(S) * X1_norm * np.dot(X2, R) + X1_mean
    return X2_t
 def _joint_log_likelihood(self, X, mask=None):
     X = array2d(X)
     if mask is not None:
         mask = array2d(mask)
         X = X.copy()
         X[mask] = np.nan
     joint_log_likelihood = np.zeros((len(self.classes_), X.shape[0]))
     for i in range(np.size(self.classes_)):
         joint_log_likelihood[i, :] = self._jll(X, i)
     return joint_log_likelihood.T
 def _joint_log_likelihood(self, X, mask=None):
     X = array2d(X)
     if mask is not None:
         mask = array2d(mask)
         X = X.copy()
         X[mask] = np.nan
     joint_log_likelihood = np.zeros((len(self.classes_), X.shape[0]))
     for i in range(np.size(self.classes_)):
         joint_log_likelihood[i, :] = self._jll(X, i)
     return joint_log_likelihood.T
Example #5
0
    def fit(self, X, y=None, **params):
        """Fit the model with X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
            
        Notes
        -----
        Calling multiple times will update the components
        """

        X = array2d(X)
        n_samples, n_features = X.shape
        X = as_float_array(X, copy=self.copy)

        if self.iteration != 0 and n_features != self.components_.shape[1]:
            raise ValueError(
                'The dimensionality of the new data and the existing components_ does not match'
            )

        # incrementally fit the model
        for i in range(0, X.shape[0]):
            self.partial_fit(X[i, :])

        return self
Example #6
0
    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self: object
            Returns the object itself
        """
        random_state = check_random_state(self.random_state)
        X = array2d(X)
        if self.n_components is None:
            n_components = X.shape[1]
        else:
            n_components = self.n_components

        V, U, E, self.n_iter_ = dict_learning(
            X, n_components, self.alpha,
            tol=self.tol, max_iter=self.max_iter,
            method=self.fit_algorithm,
            n_jobs=self.n_jobs,
            code_init=self.code_init,
            dict_init=self.dict_init,
            verbose=self.verbose,
            random_state=random_state,
            return_n_iter=True
            )
        self.components_ = U
        self.error_ = E
        return self
Example #7
0
def med_cross_distances_test(X):
    """
	Computes the nonzero componentwise L1 cross-distances between the vectors
	in X.

	Parameters
	----------

	X: array_like
	    An array with shape (n_samples, n_features)

	Returns
	-------

	D: array with shape (n_samples * (n_samples - 1) / 2, n_features)
	    The array of componentwise L1 cross-distances.

	ij: arrays with shape (n_samples * (n_samples - 1) / 2, 2)
	    The indices i and j of the vectors in X associated to the cross-
	    distances in D: D[k] = np.abs(X[ij[k, 0]] - Y[ij[k, 1]]).
	"""
    X = array2d(X)
    n_samples, n_features = X.shape
    n_nonzero_cross_dist = n_samples * (n_samples - 1) / 2
    ij = np.zeros((n_nonzero_cross_dist, 2), dtype=np.int)
    D = np.zeros((n_nonzero_cross_dist, n_features))
    ll_1 = 0
    for k in range(n_samples - 1):
        ll_0 = ll_1
        ll_1 = ll_0 + n_samples - k - 1
        ij[ll_0:ll_1, 0] = k
        ij[ll_0:ll_1, 1] = np.arange(k + 1, n_samples)
        D[ll_0:ll_1] = np.abs(X[k] - X[(k + 1):n_samples])

    return D, ij.astype(np.int)
Example #8
0
    def predict(self, X):
        """Predict regression target for X.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y: array of shape = [n_samples]
            The predicted values.
        """
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = array2d(X, dtype=DTYPE)

        # TODO - validate n_features is correct?
        n_samples, n_features = X.shape
        if self._n_features != n_features:
            raise ValueError("Number of features of the model must "
                             " match the input. Model n_features is {} and "
                             " input n_features is {}".format(
                                 self._n_features, n_features))

        result = np.empty(n_samples, dtype=DTYPE)
        return self._evaluator.predict(X, result)
Example #9
0
def l1_multiply(X):
    """
    Computes the nonzero componentwise L1 cross-distances between the vectors
    in X.

    Parameters
    ----------

    X: array_like
        An array with shape (n_samples, n_features)

    Returns
    -------

    D: array with shape (n_samples * (n_samples - 1) / 2, n_features)
        The array of componentwise L1 cross-distances.

    ij: arrays with shape (n_samples * (n_samples - 1) / 2, 2)
        The indices i and j of the vectors in X associated to the cross-
        distances in D: D[k] = np.abs(X[ij[k, 0]] - Y[ij[k, 1]]).
    """
    X = array2d(X)
    n_samples, n_features = X.shape
    n_nonzero_cross_dist = n_samples * (n_samples - 1) / 2
    ij = np.zeros((n_nonzero_cross_dist, 2), dtype=np.int)
    D = np.zeros((n_nonzero_cross_dist, n_features))
    ll_1 = 0
    for k in range(n_samples - 1):
        ll_0 = ll_1
        ll_1 = ll_0 + n_samples - k - 1
        ij[ll_0:ll_1, 0] = k
        ij[ll_0:ll_1, 1] = np.arange(k + 1, n_samples)
        D[ll_0:ll_1] = np.abs(X[k] * X[(k + 1) : n_samples])

    return D, ij.astype(np.int)
Example #10
0
    def predict(self, X):
        """ 
        Predict regression target for X.

        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.

        Parameters
        ----------
        X : array, shape = (n_samples, n_features)
            The input samples. Internally, it will be converted to
            `dtype=np.float32`.

        Returns
        -------
        y : array, shape = (n_samples, )
            The predicted values.
        """
        # A call to predict(...) preceding a call to fit(...).
        if not self.estimators_:
            return self.bias

        X = array2d(X, dtype=DTYPE, copy=False, force_all_finite=False)

        all_y_hat = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, backend="threading")(
            delayed(_parallel_helper)(tree, "predict", X) for tree in self.estimators_
        )

        return sum(all_y_hat) / len(self.estimators_)
Example #11
0
    def transform(self, X):
        """
        Transform new points into embedding space.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]

        Returns
        -------
        X_new : array, shape = [n_samples, n_components]

        Notes
        -----
        Because of scaling performed by this method, it is discouraged to use
        it together with methods that are not scale-invariant (like SVMs)
        """
        X = array2d(X)
        ind = self.nbrs_.kneighbors(X, n_neighbors=self.n_neighbors,
                                    return_distance=False)
        weights = barycenter_weights(X, self.nbrs_._fit_X[ind],
                                     reg=self.reg)
        X_new = np.empty((X.shape[0], self.n_components))
        for i in range(X.shape[0]):
            X_new[i] = np.dot(self.embedding_[ind[i]].T, weights[i])
        return X_new
Example #12
0
    def predict(self, X):
        """ 
        Predict regression value for X.

        Parameters
        ----------
        X : array, shape = (n_samples, n_features)
            The input samples. Internally, it will be converted to
            `dtype=np.float32`.

        Returns
        -------
        y : array, shape = (n_samples,)
            The predict values.
        """
        X = array2d(X, dtype=DTYPE, copy=False, force_all_finite=False)

        n_samples, n_features = X.shape

        if self.grower is None:
            return self.bias

        if self.n_features != n_features:
            raise ValueError(
                "Number of features of the model must "
                " match the input. Model n_features is %s and "
                " input n_features is %s " % (self.n_features_, n_features)
            )

        return self.tree_.predict(X)
Example #13
0
    def predict(self, X):
        """Predict regression target for X.

        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y: array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted values.
        """
        # Check data
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = array2d(X, dtype=DTYPE)

        # Assign chunk of trees to jobs
        n_jobs, n_trees, starts = _partition_estimators(self)

        # Parallel loop
        all_y_hat = Parallel(n_jobs=n_jobs,
                             verbose=self.verbose,
                             backend="threading")(
                                 delayed(_parallel_predict_regression)
                                 (self.estimators_[starts[i]:starts[i + 1]], X)
                                 for i in range(n_jobs))

        # Reduce
        y_hat = sum(all_y_hat) / len(self.estimators_)

        return y_hat
Example #14
0
 def f_test(self, contrast, pval=False):
     from sklearn.utils import array2d
     #Ypred = self.predict(self.X)
     #betas = self.coef
     #ss_errors = np.sum((self.Y - self.y_hat) ** 2, axis=0)
     C1 = array2d(contrast).T
     n, p = self.X.shape
     #Xpinv = scipy.linalg.pinv(X)
     rank_x = np.linalg.matrix_rank(self.pinv)
     C0 = np.eye(p) - np.dot(C1, scipy.linalg.pinv2(C1))  # Ortho. cont. to C1
     X0 = np.dot(self.X, C0)  # Design matrix of the reduced model
     X0pinv = scipy.linalg.pinv2(X0)
     rank_x0 = np.linalg.matrix_rank(X0pinv)
     # Find the subspace (X1) of Xc1, which is orthogonal to X0
     # The projection matrix M due to X1 can be derived from the residual
     # forming matrix of the reduced model X0
     # R0 is the residual forming matrix of the reduced model
     R0 = np.eye(n) - np.dot(X0, X0pinv)
     # R is the residual forming matrix of the full model
     R = np.eye(n) - np.dot(self.X, self.pinv)
     # compute the projection matrix
     M = R0 - R
     #Ypred = np.dot(self.X, betas)
     y_hat = self.predict(self.X)
     SS = np.sum(y_hat * np.dot(M, y_hat), axis=0)
     df_c1 = rank_x - rank_x0
     df_res = n - rank_x
     ## Broadcast over self.err_ss of Y
     f_stats = (SS * df_res) / (self.err_ss * df_c1)
     if not pval:
         return (f_stats, None)
     else:
         p_vals = stats.f.sf(f_stats, df_c1, df_res)
         return f_stats, p_vals
Example #15
0
    def fit(self, X, y=None, headers=None, verbose=False):

        X = array2d(X)

        if (X.ndim != 2):
            raise ValueError('X must have dimension 2, ndim='+X.ndim)        

#        n_samples, self.n_features_ = X.shape
        y = np.atleast_1d(y)
#        y = y.astype(DOUBLE)

        if self.target is not None:
            if y is None:
                y = [None]*len(X)
            if (len(y) != len(X)):
                raise ValueError('y must be same shape as X, len(X)='+str(len(X))+', len(y)='+str(len(y)))

        if headers is not None:
            if (len(headers) != len(X)):
                raise ValueError('headers must be same shape as X, len(X)='+str(len(X))+', len(headers)='+str(len(headers)))


        for x,t in zip(X,y):
            if verbose: print x,t
            event = array2json(x,headers)
            if self.target is not None:
                event[self.target] = t
            self.stream.train(event)
Example #16
0
    def predict(self, X):
        ''' 
        Predict regression target for X.

        The predicted regression target of an input sample is computed as the
        mean predicted regression targets of the trees in the forest.

        Parameters
        ----------
        X : array, shape = (n_samples, n_features)
            The input samples. Internally, it will be converted to
            `dtype=np.float32`.

        Returns
        -------
        y : array, shape = (n_samples, )
            The predicted values.
        '''
        # A call to predict(...) preceding a call to fit(...).
        if not self.estimators_:
            return self.bias

        X = array2d(X, dtype=DTYPE, copy=False, force_all_finite=False)

        all_y_hat = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, backend="threading")\
                            (delayed(_parallel_helper)(tree, 'predict', X)
                             for tree in self.estimators_)

        return sum(all_y_hat) / len(self.estimators_)
Example #17
0
    def fit(self, X):
        X = array2d(X)
        n_samples, n_features = X.shape
        X = as_float_array(X, copy=self.copy)
        # Center data
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_
        U, S, V = linalg.svd(X, full_matrices=False)
        explained_variance_ = (S**2) / n_samples
        explained_variance_ratio_ = (explained_variance_ /
                                     explained_variance_.sum())
        components_ = V
        n_components = self.n_components
        if n_components is None:
            n_components = n_features

        # store n_samples to revert whitening when getting covariance
        self.n_samples_ = n_samples
        self.components_ = components_[self.start_c:self.start_c +
                                       n_components]
        self.explained_variance_ = explained_variance_[self.
                                                       start_c:self.start_c +
                                                       n_components]
        self.explained_variance_ratio_ = explained_variance_ratio_[
            self.start_c:self.start_c + n_components]
        self.n_components_ = n_components
        return self
 def corr_cut(t, d):
     return corr(
         array2d(
             np.hstack([
                 optimal_theta[0][0:i], t[0],
                 optimal_theta[0][(i + 1)::]
             ])), d)
Example #19
0
    def predict(self, X):
        ''' 
        Predict regression value for X.

        Parameters
        ----------
        X : array, shape = (n_samples, n_features)
            The input samples. Internally, it will be converted to
            `dtype=np.float32`.

        Returns
        -------
        y : array, shape = (n_samples,)
            The predict values.
        '''
        X = array2d(X, dtype=DTYPE, copy=False, force_all_finite=False)

        n_samples, n_features = X.shape

        if self.grower is None:
            return self.bias

        if self.n_features != n_features:
            raise ValueError('Number of features of the model must '
                             ' match the input. Model n_features is %s and '
                             ' input n_features is %s ' %
                             (self.n_features_, n_features))

        return self.tree_.predict(X)
Example #20
0
    def fit(self, X, y=None, **params):
        """Fit the model with X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
            
        Notes
        -----
        Calling multiple times will update the components
        """

        X = array2d(X)
        n_samples, n_features = X.shape
        X = as_float_array(X, copy=self.copy)

        if self.iteration != 0 and n_features != self.components_.shape[1]:
            raise ValueError("The dimensionality of the new data and the existing components_ does not match")

        # incrementally fit the model
        for i in range(0, X.shape[0]):
            self.partial_fit(X[i, :])

        return self
Example #21
0
    def transform(self, sequences):
        """Apply the dimensionality reduction on X.

        Parameters
        ----------
        sequences: list of array-like, each of shape (n_samples_i, n_features)
            Training data, where n_samples_i in the number of samples
            in sequence i and n_features is the number of features.

        Returns
        -------
        sequence_new : list of array-like, each of shape (n_samples_i, n_components)

        """
        check_iter_of_sequences(sequences, max_iter=3)  # we might be lazy-loading
        sequences_new = []

        for X in sequences:
            X = array2d(X)
            if self.means_ is not None:
                X = X - self.means_
            X_transformed = np.dot(X, self.components_.T)

            if self.weighted_transform:
                X_transformed *= self.timescales_

            sequences_new.append(X_transformed)

        return sequences_new
Example #22
0
    def predict_proba(self, X):
        """Predict class probabilities for X.

        The predicted class probabilities of an input sample is computed as
        the mean predicted class probabilities of the trees in the forest.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute `classes_`.
        """
        # Check data
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = array2d(X, dtype=DTYPE)

        # Assign chunk of trees to jobs
        n_jobs, n_trees, starts = _partition_estimators(self)
        
        # Bugfix for _parallel_predict_proba which expects a list for multi-label and integer for single-label problems
        if not isinstance(self.n_classes_, int) and len(self.n_classes_) == 1:
            n_classes_ = self.n_classes_[0]
        else:
            n_classes_ = self.n_classes_
        # Parallel loop
        all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose,
                             backend="threading")(
            delayed(_parallel_predict_proba)(
                self.estimators_[starts[i]:starts[i + 1]],
                X,
                n_classes_,
                self.n_outputs_)
            for i in range(n_jobs))

        # Reduce
        proba = all_proba[0]

        if self.n_outputs_ == 1:
            for j in xrange(1, len(all_proba)):
                proba += all_proba[j]

            proba /= len(self.estimators_)

        else:
            for j in xrange(1, len(all_proba)):
                for k in xrange(self.n_outputs_):
                    proba[k] += all_proba[j][k]

            for k in xrange(self.n_outputs_):
                proba[k] /= self.n_estimators

        return proba
Example #23
0
 def fit(self, X, y=None):
     X = array2d(X)
     X = as_float_array(X, copy = self.copy)
     print X.shape
     sigma = np.dot(X.T,X) / X.shape[1]
     U, S, V = linalg.svd(sigma)
     tmp = np.dot(U, np.diag(1/np.sqrt(S+self.regularization)))
     self.components_ = np.dot(tmp, U.T)
     return self
Example #24
0
    def predict(self, X):
        """Predict class or regression value for X.

        For a classification model, the predicted class for each sample in X is
        returned. For a regression model, the predicted value based on X is
        returned.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y : array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted classes, or the predict values.
        """
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = array2d(X, dtype=DTYPE)

        n_samples, n_features = X.shape

        if self.tree_ is None:
            raise Exception("Tree not initialized. Perform a fit first")

        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             " match the input. Model n_features is %s and "
                             " input n_features is %s " %
                             (self.n_features_, n_features))

        proba = self.tree_.predict(X)

        # Classification
        if isinstance(self, ClassifierMixin):
            if self.n_outputs_ == 1:
                return self.classes_.take(np.argmax(proba, axis=1), axis=0)

            else:
                predictions = np.zeros((n_samples, self.n_outputs_))

                for k in xrange(self.n_outputs_):
                    predictions[:,
                                k] = self.classes_[k].take(np.argmax(proba[:,
                                                                           k],
                                                                     axis=1),
                                                           axis=0)

                return predictions

        # Regression
        else:
            if self.n_outputs_ == 1:
                return proba[:, 0]

            else:
                return proba[:, :, 0]
Example #25
0
    def predict_proba(self, X):
        """Predict class probabilities for X.

        The predicted class probabilities of an input sample is computed as
        the mean predicted class probabilities of the trees in the forest.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute `classes_`.
        """
        # Check data
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = array2d(X, dtype=DTYPE)

        # Assign chunk of trees to jobs
        n_jobs, n_trees, starts = _partition_estimators(self)

        # Bugfix for _parallel_predict_proba which expects a list for multi-label and integer for single-label problems
        if not isinstance(self.n_classes_, int) and len(self.n_classes_) == 1:
            n_classes_ = self.n_classes_[0]
        else:
            n_classes_ = self.n_classes_
        # Parallel loop
        all_proba = Parallel(n_jobs=n_jobs,
                             verbose=self.verbose,
                             backend="threading")(
                                 delayed(_parallel_predict_proba)
                                 (self.estimators_[starts[i]:starts[i + 1]], X,
                                  n_classes_, self.n_outputs_)
                                 for i in range(n_jobs))

        # Reduce
        proba = all_proba[0]

        if self.n_outputs_ == 1:
            for j in xrange(1, len(all_proba)):
                proba += all_proba[j]

            proba /= len(self.estimators_)

        else:
            for j in xrange(1, len(all_proba)):
                for k in xrange(self.n_outputs_):
                    proba[k] += all_proba[j][k]

            for k in xrange(self.n_outputs_):
                proba[k] /= self.n_estimators

        return proba
Example #26
0
    def fit(self, X, y=None, **params):
        """Fit the model with X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
            
        Notes
        -----
        Calling multiple times will update the components
        """

        X = array2d(X)
        n_samples, n_features = X.shape
        X = as_float_array(X, copy=self.copy)

        # init
        if self.iteration == 0:
            self.mean_ = np.zeros([n_features], np.float)
            self.components_ = np.zeros([self.n_components, n_features],
                                        np.float)
        else:
            if n_features != self.components_.shape[1]:
                raise ValueError(
                    'The dimensionality of the new data and the existing components_ does not match'
                )

        # incrementally fit the model
        for i in range(0, X.shape[0]):
            self.partial_fit(X[i, :])

        # update explained_variance_ratio_
        self.explained_variance_ratio_ = np.sqrt(
            np.sum(self.components_**2, axis=1))

        # sort by explained_variance_ratio_
        idx = np.argsort(-self.explained_variance_ratio_)
        self.explained_variance_ratio_ = self.explained_variance_ratio_[idx]
        self.components_ = self.components_[idx, :]

        # re-normalize
        self.explained_variance_ratio_ = (self.explained_variance_ratio_ /
                                          self.explained_variance_ratio_.sum())

        for r in range(0, self.components_.shape[0]):
            self.components_[r, :] /= np.sqrt(
                np.dot(self.components_[r, :], self.components_[r, :]))

        return self
 def fit(self, X, y=None):
     X = array2d(X)
     X = as_float_array(X, copy=self.copy)
     self.mean_ = np.mean(X, axis=0)
     X -= self.mean_
     sigma = np.dot(X.T, X) / X.shape[1]
     U, S, V = linalg.svd(sigma)
     tmp = np.dot(U, np.diag(1 / np.sqrt(S + self.regularization)))
     self.components_ = np.dot(tmp, U.T)
     return self
Example #28
0
    def _joint_log_likelihood(self, X):
        X = array2d(X)
        joint_log_likelihood = []
        for i in xrange(np.size(self.classes_)):
            jointi = np.log(self.class_prior_[i])
            n_ij = -0.5 * np.sum(np.log(np.pi * self.sigma_[i, :]))
            n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1)
            joint_log_likelihood.append(jointi + n_ij)

        joint_log_likelihood = np.array(joint_log_likelihood).T
        return joint_log_likelihood
Example #29
0
    def predict(self, X):
        """Predict class or regression value for X.

        For a classification model, the predicted class for each sample in X is
        returned. For a regression model, the predicted value based on X is
        returned.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y : array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted classes, or the predict values.
        """
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = array2d(X, dtype=DTYPE)

        n_samples, n_features = X.shape

        if self.tree_ is None:
            raise Exception("Tree not initialized. Perform a fit first")

        if self.n_features_ != n_features:
            raise ValueError(
                "Number of features of the model must "
                " match the input. Model n_features is %s and "
                " input n_features is %s " % (self.n_features_, n_features)
            )

        proba = self.tree_.predict(X)

        # Classification
        if isinstance(self, ClassifierMixin):
            if self.n_outputs_ == 1:
                return self.classes_.take(np.argmax(proba, axis=1), axis=0)

            else:
                predictions = np.zeros((n_samples, self.n_outputs_))

                for k in xrange(self.n_outputs_):
                    predictions[:, k] = self.classes_[k].take(np.argmax(proba[:, k], axis=1), axis=0)

                return predictions

        # Regression
        else:
            if self.n_outputs_ == 1:
                return proba[:, 0]

            else:
                return proba[:, :, 0]
    def _joint_log_likelihood(self, X):
        X = array2d(X)
        joint_log_likelihood = []
        for i in range(np.size(self.classes_)):
            jointi = np.log(self.class_prior_[i])
            n_ij = -0.5 * np.sum(np.log(np.pi * self.sigma_[i, :]))
            n_ij -= 0.5 * np.sum(
                ((X - self.theta_[i, :])**2) / (self.sigma_[i, :]), 1)
            joint_log_likelihood.append(jointi + n_ij)

        joint_log_likelihood = np.array(joint_log_likelihood).T
        return joint_log_likelihood
Example #31
0
    def fit_transform(self, X, y=None):
        """
        Fit the model to the data X and transform it.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.
        """
        X = array2d(X)
        self.fit(X, y)
        return self.transform(X)
Example #32
0
 def fit(self, X, y=None):
     X = array2d(X)
     n_samples, n_features = X.shape
     X = as_float_array(X, copy=self.copy)
     self.mean_ = np.mean(X, axis=0)
     X -= self.mean_
     eigs, eigv = eigh(np.dot(X.T, X) / n_samples + \
                      self.bias * np.identity(n_features))
     components = np.dot(eigv * np.sqrt(1.0 / eigs), eigv.T)
     self.components_ = components
     #Order the explained variance from greatest to least
     self.explained_variance_ = eigs[::-1]
     return self
Example #33
0
    def fit(self, X, y=None):
        """
        Fit the model to the data X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self
        """
        X = array2d(X)
        dtype = np.float32 if X.dtype.itemsize == 4 else np.float64
        rng = check_random_state(self.random_state)

        self.components_ = np.asarray(
            rng.normal(0, 0.01, (self.n_components, X.shape[1])),
            dtype=dtype,
            order='fortran')
        self.intercept_hidden_ = np.zeros(self.n_components, dtype=dtype)
        self.intercept_visible_ = np.zeros(X.shape[1], dtype=dtype)
        self.h_samples_ = np.zeros((self.batch_size, self.n_components),
                                   dtype=dtype)

        inds = np.arange(X.shape[0])
        rng.shuffle(inds)

        n_batches = int(np.ceil(len(inds) / float(self.batch_size)))

        verbose = self.verbose
        for iteration in xrange(self.n_iter):
            pl = 0.
            if verbose:
                begin = time.time()
            for minibatch in xrange(n_batches):
                pl_batch = self._fit(X[inds[minibatch::n_batches]], rng)

                if verbose:
                    pl += pl_batch.sum()

            if verbose:
                pl /= X.shape[0]
                end = time.time()
                print("Iteration %d, pseudo-likelihood = %.2f, time = %.2fs"
                      % (iteration, pl, end - begin))

        return self
Example #34
0
    def fit(self, X, y=None, **params):
        """Fit the model with X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
            
        Notes
        -----
        Calling multiple times will update the components
        """
        
        X = array2d(X)
        n_samples, n_features = X.shape 
        X = as_float_array(X, copy=self.copy)
        
        # init
        if self.iteration == 0:  
            self.mean_ = np.zeros([n_features], np.float)
            self.components_ = np.zeros([self.n_components,n_features], np.float)
        else:
            if n_features != self.components_.shape[1]:
                raise ValueError('The dimensionality of the new data and the existing components_ does not match')   
        
        # incrementally fit the model
        for i in range(0,X.shape[0]):
            self.partial_fit(X[i,:])
        
        # update explained_variance_ratio_
        self.explained_variance_ratio_ = np.sqrt(np.sum(self.components_**2,axis=1))
        
        # sort by explained_variance_ratio_
        idx = np.argsort(-self.explained_variance_ratio_)
        self.explained_variance_ratio_ = self.explained_variance_ratio_[idx]
        self.components_ = self.components_[idx,:]
        
        # re-normalize
        self.explained_variance_ratio_ = (self.explained_variance_ratio_ / self.explained_variance_ratio_.sum())
            
        for r in range(0,self.components_.shape[0]):
            self.components_[r,:] /= np.sqrt(np.dot(self.components_[r,:],self.components_[r,:]))
        
        return self
Example #35
0
    def transform(self, X):
        """
        Computes the probabilities ``P({\bf h}_j=1|{\bf v}={\bf X})``.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)

        Returns
        -------
        h: array-like, shape (n_samples, n_components)
        """
        X = array2d(X)
        return self._mean_hiddens(X)
Example #36
0
 def fit(self, X, y=None):
     X = array2d(X)
     X = as_float_array(X, copy=self.copy)
     self.mean_ = np.mean(X, axis=0)
     X -= self.mean_
     X = X.T
     examples = np.shape(X)[1]
     sigma = np.dot(X, X.T) / (examples - 1)
     U, S, V = linalg.svd(sigma)
     d = np.sqrt(1 / S[0:100])
     dd = np.append(d, np.zeros((np.shape(X)[0] - 100)))
     #tmp = np.dot(U, np.diag(1/np.sqrt(S +self.regularization)))
     tmp = np.dot(U, np.diag(dd))
     self.components_ = np.dot(tmp, U.T)
     return self
Example #37
0
    def detect(self, X):
        X = array2d(X)

        n_samples, n_features = X.shape
        N_obs = self.N_obs if self.N_obs is not None else n_features
        if N_obs > self.N_ref:
            raise ValueError

        i_pred = []
        for X_i in X:
            detection = detect_stream(X_i, N_obs,
                                      self.R_pos_, self.R_neg_,
                                      self.gamma, self.theta, self.D_req)
            i_pred.append(detection)
        return i_pred
Example #38
0
 def fit(self, X, y=None):
     X = array2d(X)
     X = as_float_array(X, copy = self.copy)
     self.mean_ = np.mean(X, axis=0)
     X -= self.mean_
     X = X.T
     examples = np.shape(X)[1]
     sigma = np.dot(X,X.T) / (examples - 1)
     U, S, V = linalg.svd(sigma)
     d = np.sqrt(1/S[0:100])
     dd = np.append(d, np.zeros((np.shape(X)[0] - 100)))
     #tmp = np.dot(U, np.diag(1/np.sqrt(S +self.regularization)))
     tmp = np.dot(U, np.diag(dd))
     self.components_ = np.dot(tmp, U.T)
     return self
Example #39
0
    def _fit(self, X):
        X = array2d(X)
        self._initialize(X.shape[1])

        self.n_observations_ += X.shape[0]
        self.n_sequences_ += 1

        self._outer_0_to_T_lagged += np.dot(X[:-self.offset].T, X[self.offset:])
        self._sum_0_to_TminusTau += X[:-self.offset].sum(axis=0)
        self._sum_tau_to_T = X[self.offset:].sum(axis=0)
        self._sum_0_to_T = X.sum(axis=0)
        self._outer_0_to_TminusTau = np.dot(X[:-self.offset].T, X[:-self.offset])
        self._outer_offset_to_T = np.dot(X[self.offset:].T, X[self.offset:])

        self._is_ditry = True
    def fit(self, X, y, sample_weight=None):
        """Fit Naive Bayes classifier according to X, y

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        sample_weight : array-like, shape = [n_samples], optional
            Weights applied to individual samples (1. for unweighted).

        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_arrays(X, y, sparse_format='csr')
        X = X.astype(np.float)
        y = column_or_1d(y, warn=True)
        _, n_features = X.shape

        labelbin = LabelBinarizer()
        Y = labelbin.fit_transform(y)
        self.classes_ = labelbin.classes_
        if Y.shape[1] == 1:
            Y = np.concatenate((1 - Y, Y), axis=1)

        # convert to float to support sample weight consistently
        Y = Y.astype(np.float64)
        if sample_weight is not None:
            Y *= array2d(sample_weight).T

        class_prior = self.class_prior

        # Count raw events from data before updating the class log prior
        # and feature log probas
        n_effective_classes = Y.shape[1]
        self.class_count_ = np.zeros(n_effective_classes, dtype=np.float64)
        self.feature_count_ = np.zeros((n_effective_classes, n_features),
                                       dtype=np.float64)
        self._count(X, Y)
        self._update_feature_log_prob()
        self._update_class_log_prior(class_prior=class_prior)
        return self
Example #41
0
    def calc_kernel_matrix(self, X):
        """
        Perform only the calculation of the covariance matrix given the GP and a dataset

        Parameters
        ----------
        X : double array_like
            An array with shape (n_samples, n_features) with the input at which
            observations were made.

        Returns
        -------
        gp : adds properties self.D and self.K
        """
        
        # Force data to 2D numpy.array
        X = array2d(X)
        n_samples, n_features = X.shape

        # Normalise input data or not. Do if normalise is 1 (all normalise) or 2 (input normalise)
        if self.normalise > 0:
            X_mean = sp.mean(X, axis=0)
            X_std = sp.std(X, axis=0)
            X_std[X_std == 0.] = 1.
            # center and scale X if necessary
            X = (X - X_mean) / X_std
        else:
            X_mean = 0.0 
            X_std  = 1.0 

        # Calculate distance matrix in vector form. The matrix form of X is obtained by scipy.spatial.distance.squareform(X)
        D = sp.spatial.distance.pdist(X, metric = self.metric)
        D = sp.spatial.distance.squareform(D)
        
        # Divide each distance ij by sqrt(N_i * N_j)
        if self.normalise == -1:
            natoms = (X != 0.).sum(1)
            D = D / sp.sqrt(sp.outer(natoms, natoms))
            
        # Covariance matrix K
        # sklearn correlation doesn't work. Probably correlation_models needs some different inputs 
        K = kernel(D, self.theta0, correlation=self.corr) 
        self.X = X
        if not self.low_memory:
            self.D = D
            self.K = K
        self.X_mean, self.X_std = X_mean, X_std
        return K
    def fit(self, X, y, sample_weight=None):
        """Fit Naive Bayes classifier according to X, y

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like, shape = [n_samples]
            Target values.

        sample_weight : array-like, shape = [n_samples], optional
            Weights applied to individual samples (1. for unweighted).

        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_arrays(X, y, sparse_format='csr')
        X = X.astype(np.float)
        y = column_or_1d(y, warn=True)
        _, n_features = X.shape

        labelbin = LabelBinarizer()
        Y = labelbin.fit_transform(y)
        self.classes_ = labelbin.classes_
        if Y.shape[1] == 1:
            Y = np.concatenate((1 - Y, Y), axis=1)

        # convert to float to support sample weight consistently
        Y = Y.astype(np.float64)
        if sample_weight is not None:
            Y *= array2d(sample_weight).T

        class_prior = self.class_prior

        # Count raw events from data before updating the class log prior
        # and feature log probas
        n_effective_classes = Y.shape[1]
        self.class_count_ = np.zeros(n_effective_classes, dtype=np.float64)
        self.feature_count_ = np.zeros((n_effective_classes, n_features),
                                       dtype=np.float64)
        self._count(X, Y)
        self._update_feature_log_prob()
        self._update_class_log_prior(class_prior=class_prior)
        return self
Example #43
0
    def calc_kernel_matrix(self, X):
        """
        Perform only the calculation of the covariance matrix given the GP and a dataset

        Parameters
        ----------
        X : double array_like
            An array with shape (n_samples, n_features) with the input at which
            observations were made.

        Returns
        -------
        gp : adds properties self.D and self.K
        """

        # Force data to 2D numpy.array
        X = array2d(X)
        n_samples, n_features = X.shape

        # Normalise input data or not. Do if normalise is 1 (all normalise) or 2 (input normalise)
        if self.normalise > 0:
            X_mean = sp.mean(X, axis=0)
            X_std = sp.std(X, axis=0)
            X_std[X_std == 0.] = 1.
            # center and scale X if necessary
            X = (X - X_mean) / X_std
        else:
            X_mean = 0.0
            X_std = 1.0

        # Calculate distance matrix in vector form. The matrix form of X is obtained by scipy.spatial.distance.squareform(X)
        D = sp.spatial.distance.pdist(X, metric=self.metric)
        D = sp.spatial.distance.squareform(D)

        # Divide each distance ij by sqrt(N_i * N_j)
        if self.normalise == -1:
            natoms = (X != 0.).sum(1)
            D = D / sp.sqrt(sp.outer(natoms, natoms))

        # Covariance matrix K
        # sklearn correlation doesn't work. Probably correlation_models needs some different inputs
        K = kernel(D, self.theta0, correlation=self.corr)
        self.X = X
        if not self.low_memory:
            self.D = D
            self.K = K
        self.X_mean, self.X_std = X_mean, X_std
        return K
Example #44
0
    def apply(self, X):
        """Apply trees in the forest to X, return leaf indices.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Input data.

        Returns
        -------
        X_leaves : array_like, shape = [n_samples, n_estimators]
            For each datapoint x in X and for each tree in the forest,
            return the index of the leaf x ends up in.
        """
        X = array2d(X, dtype=DTYPE)
        return np.array([est.tree_.apply(X) for est in self.estimators_]).T
Example #45
0
    def partial_fit(self, X, y=None, iter_offset=None):
        """Updates the model using the data in X as a mini-batch.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        iter_offset: integer, optional
            The number of iteration on data batches that has been
            performed before this call to partial_fit. This is optional:
            if no number is passed, the memory of the object is
            used.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if not hasattr(self, 'random_state_'):
            self.random_state_ = check_random_state(self.random_state)
        X = array2d(X)
        if hasattr(self, 'components_'):
            dict_init = self.components_
        else:
            dict_init = self.dict_init
        inner_stats = getattr(self, 'inner_stats_', None)
        if iter_offset is None:
            iter_offset = getattr(self, 'iter_offset_', 0)
        U, (A, B) = dict_learning_online(
            X, self.n_components, self.alpha,
            n_iter=self.n_iter, method=self.fit_algorithm,
            n_jobs=self.n_jobs, dict_init=dict_init,
            batch_size=len(X), shuffle=False,
            verbose=self.verbose, return_code=False,
            iter_offset=iter_offset, random_state=self.random_state_,
            return_inner_stats=True, inner_stats=inner_stats,
            )
        self.components_ = U

        # Keep track of the state of the algorithm to be able to do
        # some online fitting (partial_fit)
        self.inner_stats_ = (A, B)
        self.iter_offset_ = iter_offset + self.n_iter
        return self
Example #46
0
    def fit(self, features_train):

        X = array2d(features_train)
        n_samples, n_features = X.shape
        print 'given train features dimensions before PCA : ', features_train.shape

        X = as_float_array(X)

        # Data preprocessing by Mean Normalization
        # Center data
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_

        # Compute covariance matrix
        cov_matrix = np.dot(np.transpose(X), X) / n_samples
        print 'cov_matrix dimensions : ', cov_matrix.shape
        # Compute SVD
        U, S, V = linalg.svd(cov_matrix, full_matrices=1, compute_uv=1)
        print 'x dimensions : ', X.shape
        print 'U dimensions : ', U.shape
        print 'S dimensions : ', S.shape

        # Calculate optimal k - min number of principal components to maintain 99% of variance
        variance_retained = np.sum(S[:self.k_components]) / np.sum(S)

        while variance_retained < self.variance_percent_retained:
            self.k_components += 1
            variance_retained = np.sum(S[:self.k_components]) / np.sum(S)
            #print 'k_components : ', self.k_components, ' variance : ', variance_retained

        if self.k_components is None:
            self.k_components = n_features
        elif not 0 <= self.k_components <= n_features:
            raise ValueError("n_components=%r invalid for n_features=%d" %
                             (self.k_components, n_features))

        self.components = U

        self.U_reduce = U[:, :self.k_components]
        print 'number of principal components : ', self.k_components

        self.U = U
        self.S = S
        self.V = V

        return (U, S, V)
Example #47
0
    def fit(self, X, y, check_input=True):
        ''' 
        Build a forest of trees from the available chunk of training set (X, y).

        Parameters
        ----------
        X : array, shape = (n_samples, n_features)
            The training input samples. Internally, it will be converted to
            `dtype=np.float32`.

        y : array, shape = (n_samples,)
            The target values.

        Returns
        -------
        self : object
            Returns self.
        '''
        if check_input:
            X = array2d(X, dtype=DTYPE, copy=False, force_all_finite=False)

            if y.ndim != 1:
                raise ValueError('y must be 1-d array')

            if len(y) != X.shape[0]:
                raise ValueError('Number of labels (%d) does not match '
                                 'number of samples (%d).' %
                                 (len(y), X.shape[0]))

            if y.dtype != DOUBLE or not y.flags.c_contiguous:
                y = np.ascontiguousarray(y, dtype=DOUBLE)

        # First call to fit(...)?
        if not self.estimators_:
            self._initialize_estimators()

        # Parallel loop: we use the threading backend as the Cython code
        # for fitting the trees is internally releasing the Python GIL
        # making threading always more efficient than multiprocessing in
        # that case.
        Parallel(n_jobs=self.n_jobs, verbose=self.verbose, backend="threading")\
                (delayed(_parallel_build_trees)(tree, X, y, tree_idx,
                 len(self.estimators_), self.prob, verbose=self.verbose)
                 for tree_idx, tree in enumerate(self.estimators_))

        return self
Example #48
0
    def fit(self, X, y, check_input=True):
        """ 
        Build a forest of trees from the available chunk of training set (X, y).

        Parameters
        ----------
        X : array, shape = (n_samples, n_features)
            The training input samples. Internally, it will be converted to
            `dtype=np.float32`.

        y : array, shape = (n_samples,)
            The target values.

        Returns
        -------
        self : object
            Returns self.
        """
        if check_input:
            X = array2d(X, dtype=DTYPE, copy=False, force_all_finite=False)

            if y.ndim != 1:
                raise ValueError("y must be 1-d array")

            if len(y) != X.shape[0]:
                raise ValueError(
                    "Number of labels (%d) does not match " "number of samples (%d)." % (len(y), X.shape[0])
                )

            if y.dtype != DOUBLE or not y.flags.c_contiguous:
                y = np.ascontiguousarray(y, dtype=DOUBLE)

        # First call to fit(...)?
        if not self.estimators_:
            self._initialize_estimators()

        # Parallel loop: we use the threading backend as the Cython code
        # for fitting the trees is internally releasing the Python GIL
        # making threading always more efficient than multiprocessing in
        # that case.
        Parallel(n_jobs=self.n_jobs, verbose=self.verbose, backend="threading")(
            delayed(_parallel_build_trees)(tree, X, y, tree_idx, len(self.estimators_), self.prob, verbose=self.verbose)
            for tree_idx, tree in enumerate(self.estimators_)
        )

        return self
Example #49
0
    def fit(self, features_train):

        X = array2d(features_train)
        n_samples, n_features = X.shape
        print 'given train features dimensions before PCA : ', features_train.shape

        X = as_float_array(X)

        # Data preprocessing by Mean Normalization
        # Center data
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_

        # Compute covariance matrix
        cov_matrix = np.dot(np.transpose(X), X)/n_samples
        print 'cov_matrix dimensions : ', cov_matrix.shape
        # Compute SVD
        U, S, V = linalg.svd(cov_matrix, full_matrices=1, compute_uv=1)
        print 'x dimensions : ', X.shape
        print 'U dimensions : ', U.shape
        print 'S dimensions : ', S.shape

        # Calculate optimal k - min number of principal components to maintain 99% of variance
        variance_retained = np.sum(S[:self.k_components]) / np.sum(S)

        while variance_retained < self.variance_percent_retained:
            self.k_components += 1
            variance_retained = np.sum(S[:self.k_components]) / np.sum(S)
            #print 'k_components : ', self.k_components, ' variance : ', variance_retained

        if self.k_components is None:
            self.k_components = n_features
        elif not 0 <= self.k_components <= n_features:
            raise ValueError("n_components=%r invalid for n_features=%d" % (self.k_components, n_features))

        self.components = U

        self.U_reduce = U[:, :self.k_components]
        print 'number of principal components : ', self.k_components

        self.U = U
        self.S = S
        self.V = V

        return (U, S, V)
    def fit(self, X, y, mask=None):
        """Fit Gaussian Naive Bayes according to X, y

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.
        mask : array-like, shape = [n_samples, n_features]
            Binary, 1 at unobserved features.

        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_arrays(X, y, sparse_format='dense')

        n_samples, n_features = X.shape

        if n_samples != y.shape[0]:
            raise ValueError("X and y have incompatible shapes")

        if mask is not None:
            mask = array2d(mask)
            X = X.copy()
            X[mask] = np.nan

        self.classes_ = unique_y = np.unique(y)
        n_classes = unique_y.shape[0]

        self.theta_ = np.zeros((n_classes, n_features))
        self.sigma_ = np.zeros((n_classes, n_features))
        self.class_prior_ = np.zeros(n_classes)
        self._n_ij = []
        epsilon = 1e-9
        for i, y_i in enumerate(unique_y):
            self.theta_[i, :] = bn.nanmean(X[y == y_i, :], axis=0)
            self.sigma_[i, :] = bn.nanvar(X[y == y_i, :], axis=0) + epsilon
            self.class_prior_[i] = np.float(np.sum(y == y_i)) / n_samples
            self._n_ij.append(-0.5 * np.sum(np.log(np.pi * self.sigma_[i, :])))
        self._logprior = np.log(self.class_prior_)
        return self
    def fit(self, X, y, mask=None):
        """Fit Gaussian Naive Bayes according to X, y

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.
        mask : array-like, shape = [n_samples, n_features]
            Binary, 1 at unobserved features.

        Returns
        -------
        self : object
            Returns self.
        """
        X, y = check_arrays(X, y, sparse_format='dense')

        n_samples, n_features = X.shape

        if n_samples != y.shape[0]:
            raise ValueError("X and y have incompatible shapes")

        if mask is not None:
            mask = array2d(mask)
            X = X.copy()
            X[mask] = np.nan

        self.classes_ = unique_y = np.unique(y)
        n_classes = unique_y.shape[0]

        self.theta_ = np.zeros((n_classes, n_features))
        self.sigma_ = np.zeros((n_classes, n_features))
        self.class_prior_ = np.zeros(n_classes)
        self._n_ij = []
        epsilon = 1e-9
        for i, y_i in enumerate(unique_y):
            self.theta_[i, :] = bn.nanmean(X[y == y_i, :], axis=0)
            self.sigma_[i, :] = bn.nanvar(X[y == y_i, :], axis=0) + epsilon
            self.class_prior_[i] = np.float(np.sum(y == y_i)) / n_samples
            self._n_ij.append(-0.5 * np.sum(np.log(np.pi * self.sigma_[i, :])))
        self._logprior = np.log(self.class_prior_)
        return self
Example #52
0
    def transform(self, X):
        """Apply the dimensionality reduction on X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            New data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)

        """
        X = array2d(X)
        X_transformed = X - self.mean_
        X_transformed = np.dot(X_transformed, self.components_.T)
        return np.asarray(X_transformed)
Example #53
0
    def transform(self, X):
        """Apply the dimensionality reduction on X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            New data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)

        """
        X = array2d(X)
        X_transformed = X - self.mean_
        X_transformed = np.dot(X_transformed, self.components_.T)
        return np.asarray(X_transformed)
Example #54
0
    def _fit(self, X):
        X = np.asarray(array2d(X), dtype=np.float64)
        self._initialize(X.shape[1])
        if not len(X) > self.lag_time:
            raise ValueError('First dimension must be longer than '
                'lag_time=%d. X has shape (%d, %d)' % ((self.lag_time,) + X.shape))

        self.n_observations_ += X.shape[0]
        self.n_sequences_ += 1

        self._outer_0_to_T_lagged += np.dot(X[:-self.lag_time].T, X[self.lag_time:])
        self._sum_0_to_TminusTau += X[:-self.lag_time].sum(axis=0)
        self._sum_tau_to_T += X[self.lag_time:].sum(axis=0)
        self._sum_0_to_T += X.sum(axis=0)
        self._outer_0_to_TminusTau += np.dot(X[:-self.lag_time].T, X[:-self.lag_time])
        self._outer_offset_to_T += np.dot(X[self.lag_time:].T, X[self.lag_time:])

        self._is_dirty = True
Example #55
0
    def predict_proba(self, X):
        """Predict class probabilities of the input samples X.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. Classes are ordered
            by arithmetical order.
        """
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = array2d(X, dtype=DTYPE, order="F")

        n_samples, n_features = X.shape

        if self.tree_ is None:
            raise Exception("Tree not initialized. Perform a fit first.")

        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             " match the input. Model n_features is %s and "
                             " input n_features is %s "
                             % (self.n_features_, n_features))

        proba = []
        P = self.tree_.predict(X)

        for k in xrange(self.n_outputs_):
            P_k = P[:, k, :self.n_classes_[k]]
            normalizer = P_k.sum(axis=1)[:, np.newaxis]
            normalizer[normalizer == 0.0] = 1.0
            P_k /= normalizer
            proba.append(P_k)

        if self.n_outputs_ == 1:
            return proba[0]

        else:
            return proba
Example #56
0
    def predict_proba(self, X):
        """Predict class probabilities of the input samples X.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes], or a list of n_outputs
            such arrays if n_outputs > 1.
            The class probabilities of the input samples. Classes are ordered
            by arithmetical order.
        """
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = array2d(X, dtype=DTYPE, order="F")

        n_samples, n_features = X.shape

        if self.tree_ is None:
            raise Exception("Tree not initialized. Perform a fit first.")

        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             " match the input. Model n_features is %s and "
                             " input n_features is %s " %
                             (self.n_features_, n_features))

        proba = []
        P = self.tree_.predict(X)

        for k in xrange(self.n_outputs_):
            P_k = P[:, k, :self.n_classes_[k]]
            normalizer = P_k.sum(axis=1)[:, np.newaxis]
            normalizer[normalizer == 0.0] = 1.0
            P_k /= normalizer
            proba.append(P_k)

        if self.n_outputs_ == 1:
            return proba[0]

        else:
            return proba
Example #57
0
    def _decision_function(self, X):
        X = array2d(X)
        norm2 = []
        for i in range(len(self.classes_)):
            R = self.rotations_[i]
            S = self.scalings_[i]
            Xm = X - self.means_[i]
            X2 = np.dot(Xm, R * (S**(-0.5)))
            norm2.append(np.sum(X2**2, 1))
        norm2 = np.array(norm2).T  # shape = [len(X), n_classes]
        """return (-0.5 * (norm2 + np.sum(np.log(self.scalings_), 1))
                + np.log(self.priors_))
        """

        sum_log_scalings = []
        for i in range(len(self.scalings_)):
            """ Is this correct? Or do we sum across the columns instead?"""
            sum_log_scalings.append(np.sum(np.log(self.scalings_[i])))

        sum_log_scalings = np.array(sum_log_scalings)
        return (-0.5 * (norm2 + sum_log_scalings + np.log(self.priors_)))
Example #58
0
File: zca.py Project: dudevil/NDSB
    def fit(self, X, y=None):
        # X should be 2-dimensional
        X = array2d(X)
        X = as_float_array(X, copy=self.copy)
        # center the mean row-wise
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_
        # compute
        sigma = np.dot(X.T, X) / X.shape[1]
        U, S, V = linalg.svd(sigma)
        # perform dimensionality reduction if needed
        if self.n_components:
            U = U[:self.n_components]
        self.explained_variance_ = (S ** 2) / X.shape[0]
        self.explained_variance_ratio_ = (self.explained_variance_ /
                                     self.explained_variance_.sum())

        # obtain transformation matrix
        tmp = np.dot(U, np.diag(1/np.sqrt(S+self.regularization)))
        self.components_ = np.dot(tmp, U.T)
        # if self.n_components:
        #    self.components_ = self.components_[:self.n_components]
        return self