Example #1
0
def test_np_matrix():
    # Confirm that input validation code does not return np.matrix
    X = np.arange(12).reshape(3, 4)

    assert_false(isinstance(as_float_array(X), np.matrix))
    assert_false(isinstance(as_float_array(np.matrix(X)), np.matrix))
    assert_false(isinstance(as_float_array(sp.csc_matrix(X)), np.matrix))
Example #2
0
def test_np_matrix():
    """
    Confirm that input validation code does not return np.matrix
    """
    X = np.arange(12).reshape(3, 4)

    assert_false(isinstance(as_float_array(X), np.matrix))
    assert_false(isinstance(as_float_array(np.matrix(X)), np.matrix))
    assert_false(isinstance(as_float_array(sp.csc_matrix(X)), np.matrix))

    assert_false(isinstance(atleast2d_or_csr(X), np.matrix))
    assert_false(isinstance(atleast2d_or_csr(np.matrix(X)), np.matrix))
    assert_false(isinstance(atleast2d_or_csr(sp.csc_matrix(X)), np.matrix))

    assert_false(isinstance(atleast2d_or_csc(X), np.matrix))
    assert_false(isinstance(atleast2d_or_csc(np.matrix(X)), np.matrix))
    assert_false(isinstance(atleast2d_or_csc(sp.csr_matrix(X)), np.matrix))

    assert_false(isinstance(safe_asarray(X), np.matrix))
    assert_false(isinstance(safe_asarray(np.matrix(X)), np.matrix))
    assert_false(isinstance(safe_asarray(sp.lil_matrix(X)), np.matrix))

    assert_true(atleast2d_or_csr(X, copy=False) is X)
    assert_false(atleast2d_or_csr(X, copy=True) is X)
    assert_true(atleast2d_or_csc(X, copy=False) is X)
    assert_false(atleast2d_or_csc(X, copy=True) is X)
Example #3
0
def mean_absolute_error(y_true, y_pred):
    """
    Mean absolute error and its standard deviation.
    
    If you need only mean absolute error, use 
    :func:`sklearn.metrics.mean_absolute_error`
    
    Parameters
    ----------
    y_true : array, shape(n_samples,)
        Ground truth scores
    y_pred : array, shape(n_samples,)
        Predicted scores

    Returns
    -------
    mean : float
        mean of squared errors
    stdev : float
        standard deviation of squared errors
    """

    # check inputs
    assert_all_finite(y_true)
    y_true = as_float_array(y_true)
    assert_all_finite(y_pred)
    y_pred = as_float_array(y_pred)
    check_consistent_length(y_true, y_pred)

    # calculate errors
    errs = np.abs(y_true - y_pred)
    mean = np.nanmean(errs)
    stdev = np.nanstd(errs)

    return mean, stdev
def transform_with_scaler( Y, scaler=None, wrt_X=[] ):
    Y = as_float_array( Y )
    if len(wrt_X) and not scaler:
        wrt_X = as_float_array( wrt_X )
        scaler = get_scaler( wrt_X )
    with_mean = scaler.mean_                                      
    with_stdv = scaler.std_                                       
    Z = scaler.transform(Y)                               
    return Z
Example #5
0
def item_finder_report(y_true, y_pred, disp=True):
    """
    Report brief summary of prediction performance

    * AUC
    * number of data
    * mean and standard dev. of true scores
    * mean and standard dev. of predicted scores

    Parameters
    ----------
    y_true : array, shape(n_samples,)
        Ground truth scores
    y_pred : array, shape(n_samples,)
        Predicted scores
    disp : bool, optional, default=True
        if True, print report

    Returns
    -------
    stats : dict
        belief summary of prediction performance
    """

    # check inputs
    assert_all_finite(y_true)
    if not is_binary_score(y_true):
        raise ValueError('True scores must be binary')
    y_true = as_float_array(y_true)
    assert_all_finite(y_pred)
    y_pred = as_float_array(y_pred)
    check_consistent_length(y_true, y_pred)

    # calc statistics
    stats = {
        'n_samples': y_true.size,
        'true': {'mean': np.mean(y_true), 'stdev': np.std(y_true)},
        'predicted': {'mean': np.mean(y_pred), 'stdev': np.std(y_pred)}}

    # statistics at least 0 and 1 must be contained in a score array
    if is_binary_score(y_true, allow_uniform=False):
        stats['area under the curve'] = skm.roc_auc_score(y_true, y_pred)

    # display statistics
    if disp:
        print(
            json.dumps(
                stats, sort_keys=True, indent=4, separators=(',', ': '),
                ensure_ascii=False), file=sys.stderr)

    return stats
Example #6
0
def item_finder_statistics(y_true, y_pred):
    """
    Full Statistics of prediction performance

    * n_samples
    * mean_absolute_error: mean, stdev
    * mean_squared_error: mean, rmse, stdev
    * predicted: mean, stdev
    * true: mean, stdev

    Parameters
    ----------
    y_true : array, shape=(n_samples,)
        Ground truth scores
    y_pred : array, shape=(n_samples,)
        Predicted scores

    Returns
    -------
    stats : dict
        Full statistics of prediction performance
    """

    # check inputs
    assert_all_finite(y_true)
    if not is_binary_score(y_true):
        raise ValueError('True scores must be binary')
    y_true = as_float_array(y_true)
    assert_all_finite(y_pred)
    y_pred = as_float_array(y_pred)
    check_consistent_length(y_true, y_pred)

    # calc statistics
    stats = {}

    # dataset size
    stats['n_samples'] = y_true.size

    # descriptive statistics of ground truth scores
    stats['true'] = {'mean': np.mean(y_true), 'stdev': np.std(y_true)}

    # descriptive statistics of ground predicted scores
    stats['predicted'] = {'mean': np.mean(y_pred), 'stdev': np.std(y_pred)}

    # statistics at least 0 and 1 must be contained in a score array
    if is_binary_score(y_true, allow_uniform=False):

        # AUC (area under the curve)
        stats['area under the curve'] = skm.roc_auc_score(y_true, y_pred)

    return stats
Example #7
0
    def fit(self, X, y):

        n_samples, self.n_features = X.shape
        self.n_outputs = y.shape[1]
        self._init_fit(X)

        self.hidden_activations_ = self._get_hidden_activations(X)

        if self.regularized:
            self._solve_regularized(as_float_array(y, copy=True))
        else:
            self._solve(as_float_array(y, copy=True))

        return self
Example #8
0
def score_predictor_report(y_true, y_pred, disp=True):
    """
    Report brief summary of prediction performance
    
    * mean absolute error
    * root mean squared error
    * number of data
    * mean and standard dev. of true scores
    * mean and standard dev. of predicted scores

    Parameters
    ----------
    y_true : array, shape(n_samples,)
        Ground truth scores
    y_pred : array, shape(n_samples,)
        Predicted scores
    disp : bool, optional, default=True
        if True, print report

    Returns
    -------
    stats : dict
        belief summary of prediction performance
    """

    # check inputs
    assert_all_finite(y_true)
    y_true = as_float_array(y_true)
    assert_all_finite(y_pred)
    y_pred = as_float_array(y_pred)
    check_consistent_length(y_true, y_pred)

    # calc statistics
    stats = {
        'mean absolute error': skm.mean_absolute_error(y_true, y_pred),
        'root mean squared error':
            np.sqrt(np.maximum(skm.mean_squared_error(y_true, y_pred), 0.)),
        'n_samples': y_true.size,
        'true': {'mean': np.mean(y_true), 'stdev': np.std(y_true)},
        'predicted': {'mean': np.mean(y_pred), 'stdev': np.std(y_pred)}}

    # display statistics
    if disp:
        print(json.dumps(
            stats, sort_keys=True, indent=4, separators=(',', ': '),
            ensure_ascii=False),
            file=sys.stderr)

    return stats
Example #9
0
    def fit(self, X, y):
        """
        Fit the model using X, y as training data.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like of shape [n_samples, n_outputs]
            Target values (class labels in classification, real numbers in
            regression)

        Returns
        -------
        self : object

            Returns an instance of self.
        """
        # fit random hidden layer and compute the hidden layer activations
        self.hidden_activations_ = self.hidden_layer.fit_transform(X)

        # solve the regression from hidden activations to outputs
        self._fit_regression(as_float_array(y, copy=True))

        return self
Example #10
0
    def _fit(self, X):
        """Fit the model to the data X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        Returns
        -------
        X : ndarray, shape (n_samples, n_features)
            The input data, copied, centered and whitened when requested.
        """
        random_state = self._random_state
        X = np.atleast_2d(as_float_array(X))
        self._initialize(X, random_state)
        for it in range(self.n_iter):
            if it % 10 == 0:
                self._print_status(it)
            else:
                logger.info("<{}>".format(it))
            self._sample_topics(random_state)
        self._print_status(self.n_iter)
        self.components_ = self.nzw + self.eta
        self.components_ /= np.sum(self.components_, axis=1, keepdims=True)
        return self.ndz
Example #11
0
    def fit(self, X, y=None, **params):
        """Fit the model with X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
            
        Notes
        -----
        Calling multiple times will update the components
        """

        X = array2d(X)
        n_samples, n_features = X.shape
        X = as_float_array(X, copy=self.copy)

        if self.iteration != 0 and n_features != self.components_.shape[1]:
            raise ValueError("The dimensionality of the new data and the existing components_ does not match")

        # incrementally fit the model
        for i in range(0, X.shape[0]):
            self.partial_fit(X[i, :])

        return self
Example #12
0
def k_modes(X, n_clusters, n_init=1, max_iter=5,
            verbose=False, tol=1e-4, random_state=None, copy_x=True, n_jobs=1):
    """K-modes clustering algorithm."""
    if n_init <= 0:
        raise ValueError("Invalid number of initializations."
                         " n_init=%d must be bigger than zero." % n_init)

    X = as_float_array(X, copy=copy_x)
    matrix_all_irm = _compute_all_irm(X, n_clusters)
    best_labels, best_modes, best_mirm = None, None, -np.inf

    if n_jobs == 1:

        for j in range(2,n_clusters+1):
            # For a single thread, less memory is needed if we just store one set
            # of the best results (as opposed to one set per run per thread).
            for it in range(n_init):
                # run a k-modes once
                labels, modes, mirm_sum = _kmodes_single(
                    X, j, matrix_all_irm, max_iter=max_iter,
                    verbose=verbose, tol=tol, random_state=random_state)
                # determine if these results are the best so far
                if mirm_sum >= best_mirm:
                    best_labels = labels.copy()
                    best_modes = modes.copy()
                    best_mirm = mirm_sum
    else:
        # TODO:
        pass

    return best_modes, best_labels, best_mirm
Example #13
0
 def transform(self, X):
     X = as_float_array(X, copy=self.copy) 
     if self.mean_ is not None and self.std_ is not None:
         X -= self.mean_
         X /= self.std_
     X_whitend = np.dot(X, self.components_)
     return X_whitend
 def fit(self, X):
     n_samples, self.n_features = X.shape
     self.n_outputs = X.shape[1]
     self._init_fit(X)
     self.hidden_activations_ = self._get_hidden_activations(X)
     self._regularized(as_float_array(X, copy=True))
     #self.coef_output_ = safe_sparse_dot(pinv2(self.hidden_activations_), X)
     return self
Example #15
0
 def fit(self, X, y=None):
     X = array2d(X)
     X = as_float_array(X, copy = self.copy)
     print X.shape
     sigma = np.dot(X.T,X) / X.shape[1]
     U, S, V = linalg.svd(sigma)
     tmp = np.dot(U, np.diag(1/np.sqrt(S+self.regularization)))
     self.components_ = np.dot(tmp, U.T)
     return self
 def fit(self, X):
     X = as_float_array(X, copy=self.copy)
     self._mean = np.mean(X, axis=0)
     X -= self._mean
     sigma = np.dot(X.T,X) / X.shape[1]
     U, S, V = linalg.svd(sigma)
     tmp = np.dot(U, np.diag(1 / np.sqrt(S + self.regularization)))
     self._components = np.dot(tmp, U.T)
     return self
Example #17
0
def test_as_float_array():
    """
    Test function for as_float_array
    """
    X = np.ones((3, 10), dtype=np.int32)
    X = X + np.arange(10, dtype=np.int32)
    # Checks that the return type is ok
    X2 = as_float_array(X, copy=False)
    np.testing.assert_equal(X2.dtype, np.float32)
    # Another test
    X = X.astype(np.int64)
    X2 = as_float_array(X, copy=True)
    # Checking that the array wasn't overwritten
    assert_true(as_float_array(X, False) is not X)
    # Checking that the new type is ok
    np.testing.assert_equal(X2.dtype, np.float64)
    # Here, X is of the right type, it shouldn't be modified
    X = np.ones((3, 2), dtype=np.float32)
    assert_true(as_float_array(X, copy=False) is X)
Example #18
0
def import_lda(filename):
    lines = filename.read().splitlines()
    nr_lines = len(lines)
    labels = range(nr_lines)
    features = range(nr_lines)
    for i in range(nr_lines):
        labels[i], data = lines[i].split(',')
        features[i] = map(float, data.split())
    X = as_float_array(features)
    return labels, X
Example #19
0
 def transform(self, X, y=None, copy=None):
     """Perform ZCA whitening
     Parameters
     ----------
     X : array-like with shape [n_samples, n_features]
         The data to whiten along the features axis.
     """
     check_is_fitted(self, 'mean_')
     X = as_float_array(X, copy=self.copy)
     return np.dot(X - self.mean_, self.whiten_.T)
Example #20
0
    def fit(self, X, y):
        """Fit the model using X, y as training data.
        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training data.
        y : array-like, shape = [n_samples]
            Target values. Will be cast to X's dtype if necessary
        Returns
        -------
        self : object
               Returns an instance of self.
        """
        X, y = check_X_y(X,
                         y, ['csr', 'csc'],
                         y_numeric=True,
                         ensure_min_samples=2,
                         estimator=self)
        X = as_float_array(X, copy=False)
        n_samples, n_features = X.shape

        X, y, X_offset, y_offset, X_scale = \
            self._preprocess_data(X, y, self.fit_intercept, self.normalize)

        estimator_func, params = self._make_estimator_and_params(X, y)
        memory = self.memory
        if memory is None:
            memory = Memory(cachedir=None, verbose=0)
        elif isinstance(memory, six.string_types):
            memory = Memory(cachedir=memory, verbose=0)
        elif not isinstance(memory, Memory):
            raise ValueError("'memory' should either be a string or"
                             " a sklearn.utils.Memory"
                             " instance, got 'memory={!r}' instead.".format(
                                 type(memory)))

        scores_ = memory.cache(_resample_model,
                               ignore=['verbose', 'n_jobs', 'pre_dispatch'
                                       ])(estimator_func,
                                          X,
                                          y,
                                          scaling=self.scaling,
                                          n_resampling=self.n_resampling,
                                          n_jobs=self.n_jobs,
                                          verbose=self.verbose,
                                          pre_dispatch=self.pre_dispatch,
                                          random_state=self.random_state,
                                          sample_fraction=self.sample_fraction,
                                          **params)

        if scores_.ndim == 1:
            scores_ = scores_[:, np.newaxis]
        self.all_scores_ = scores_
        self.scores_ = np.max(self.all_scores_, axis=1)
        return self
Example #21
0
def k_means(X,
            n_clusters,
            init='similar_cut',
            sparsity=None,
            max_iter=10,
            verbose=False,
            tol=1e-4,
            random_state=None,
            debug_directory=None,
            n_jobs=1,
            algorithm=None,
            **kargs):

    random_state = check_random_state(random_state)

    if max_iter <= 0:
        raise ValueError('Number of iterations should be a positive number,'
                         ' got %d instead' % max_iter)

    X = as_float_array(X)
    tol = _tolerance(X, tol)

    labels, inertia, centers, debug_header = None, None, None, None

    if debug_directory:
        # Create debug header
        strf_now = datetime.datetime.now()
        debug_header = str(strf_now).replace(':',
                                             '-').replace(' ',
                                                          '_').split('.')[0]

        # Check debug_directory
        if not os.path.exists(debug_directory):
            os.makedirs(debug_directory)
            s = datetime.datetime.now()

    # For a single thread, run a k-means once
    centers, labels, inertia, n_iter_ = kmeans_single(
        X,
        n_clusters,
        max_iter=max_iter,
        init=init,
        sparsity=sparsity,
        verbose=verbose,
        tol=tol,
        random_state=random_state,
        debug_directory=debug_directory,
        debug_header=debug_header,
        algorithm=algorithm,
        **kargs)

    # parallelisation of k-means runs
    # TODO

    return centers, labels, inertia
Example #22
0
    def fit_transform(self, X, y=None):
        """ Fit LSI model to X and perform dimensionality reduction on X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Training data.

        Returns
        -------

        X_new : array, shape (n_samples, n_components)
            Reduced version of X. This will always be a dense array.
        """
        X = as_float_array(X, copy=False)
        random_state = check_random_state(self.random_state)

        # If sparse and not csr or csc, convert to csr
        if sp.issparse(X) and X.getformat() not in ["csr", "csc"]:
            X = X.tocsr()

        if self.algorithm == "arpack":
            U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol)
            # svds doesn't abide by scipy.linalg.svd/randomized_svd
            # conventions, so reverse its outputs.
            Sigma = Sigma[::-1]
            U, VT = svd_flip(U[:, ::-1], VT[::-1])

        elif self.algorithm == "randomized":
            k = self.n_components
            n_features = X.shape[1]
            if k >= n_features:
                raise ValueError("n_components must be < n_features;"
                                 " got %d >= %d" % (k, n_features))
            U, Sigma, VT = randomized_svd(X,
                                          self.n_components,
                                          n_iter=self.n_iter,
                                          random_state=random_state)
        else:
            raise ValueError("unknown algorithm %r" % self.algorithm)

        self.components_ = VT
        self.Sigma = Sigma[:self.n_components]

        # Calculate explained variance & explained variance ratio
        X_transformed = np.dot(U, np.diag(Sigma))
        self.explained_variance_ = exp_var = np.var(X_transformed, axis=0)
        if sp.issparse(X):
            _, full_var = mean_variance_axis(X, axis=0)
            full_var = full_var.sum()
        else:
            full_var = np.var(X, axis=0).sum()
        self.explained_variance_ratio_ = exp_var / full_var
        return X_transformed
Example #23
0
def test_as_float_array():
    """Test function for as_float_array"""
    X = np.ones((3, 10), dtype=np.int32)
    X = X + np.arange(10, dtype=np.int32)
    # Checks that the return type is ok
    X2 = as_float_array(X, copy=False)
    np.testing.assert_equal(X2.dtype, np.float32)
    # Another test
    X = X.astype(np.int64)
    X2 = as_float_array(X, copy=True)
    # Checking that the array wasn't overwritten
    assert_true(as_float_array(X, False) is not X)
    # Checking that the new type is ok
    np.testing.assert_equal(X2.dtype, np.float64)
    # Here, X is of the right type, it shouldn't be modified
    X = np.ones((3, 2), dtype=np.float32)
    assert_true(as_float_array(X, copy=False) is X)
    # Test that if X is fortran ordered it stays
    X = np.asfortranarray(X)
    assert_true(np.isfortran(as_float_array(X, copy=True)))
Example #24
0
    def inverse_transform(self, X, copy=None):
        """Undo the ZCA transform and rotate back to the original
		representation
		Parameters
		----------
		X : array-like with shape [n_samples, n_features]
			The data to rotate back.
		"""
        check_is_fitted(self, 'mean_')
        X = as_float_array(X, copy=self.copy)
        return np.dot(X, self.dewhiten_) + self.mean_
Example #25
0
File: zca.py Project: mwv/zca
    def transform(self, X, y=None, copy=None):
        """Perform ZCA whitening

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            The data to whiten along the features axis.
        """
        check_is_fitted(self, 'mean_')
        X = as_float_array(X, copy=self.copy)
        return np.dot(X - self.mean_, self.whiten_.T)
def get_distance_metric(X, dtype='euclidean', normalize=True, nrows=10 ):
    X = as_float_array( X )
    dist = nn.DistanceMetric.get_metric('euclidean')
    d = dist.pairwise(X)
    print '-' * 80
    print introspection( dist )
    nrows=min(nrows,len(d))
    for row in d[:nrows]:
        print [ "%.2f" % x for x in row[:nrows] ]
    print '-' * 80
    return dist
Example #27
0
    def fit(self, X, y=None):
        """Estimate the precision using an adaptive maximum likelihood estimator.
        Parameters
        ----------
        X : ndarray, shape (n_samples, n_features)
            Data from which to compute the proportion matrix.
        """
        X = check_array(X, ensure_min_features=2, estimator=self)
        X = as_float_array(X, copy=False, force_all_finite=False)

        n_samples, n_features = X.shape

        # perform first estimate
        new_estimator = clone(self.estimator)
        new_estimator.fit(X)

        if self.method == 'binary':
            # generate weights
            self.lam_ = self._binary_weights(new_estimator)

            # perform second step adaptive estimate
            self.estimator_ = QuicGraphLasso(lam=self.lam_ *
                                             new_estimator.lam_,
                                             mode='default',
                                             init_method='cov',
                                             auto_scale=False)
            self.estimator_.fit(X)

        elif self.method == 'inverse_squared':
            self.lam_ = self._inverse_squared_weights(new_estimator)

            # perform second step adaptive estimate
            self.estimator_ = QuicGraphLassoCV(lam=self.lam_ *
                                               new_estimator.lam_,
                                               auto_scale=False)
            self.estimator_.fit(X)

        elif self.method == 'inverse':
            self.lam_ = self._inverse_weights(new_estimator)

            # perform second step adaptive estimate
            self.estimator_ = QuicGraphLassoCV(lam=self.lam_ *
                                               new_estimator.lam_,
                                               auto_scale=False)
            self.estimator_.fit(X)

        else:
            raise NotImplementedError(
                ("Only method='binary', 'inverse_squared', or",
                 "'inverse' have been implemented."))

        self.is_fitted = True
        return self
Example #28
0
    def _check_X(self, X):
        _X = None
        if not hasattr(X, 'dtype'):
            _X = check_array(as_float_array(X))
        _X = check_array(X)

        if self.n_features:
            if _X.shape[1] != self.n_features:
                raise Exception(
                    'X has {} columns while {} are expected'.format(
                        _X.shape[1], self.n_features))
        return _X
Example #29
0
    def fit(self, X, y=None, **fit_params):
        """Fits the inverse covariance model according to the given training
        data and parameters.

        Parameters
        -----------
        X : 2D ndarray, shape (n_features, n_features)
            Input data.

        Returns
        -------
        self
        """
        # quic-specific outputs
        self.opt_ = None
        self.cputime_ = None
        self.iters_ = None
        self.duality_gap_ = None

        # these must be updated upon self.fit()
        self.sample_covariance_ = None
        self.lam_scale_ = None
        self.is_fitted_ = False

        self.path_ = _validate_path(self.path)
        X = check_array(X, ensure_min_features=2, estimator=self)
        X = as_float_array(X, copy=False, force_all_finite=False)
        self.init_coefs(X)
        if self.method == "quic":
            (
                self.precision_,
                self.covariance_,
                self.opt_,
                self.cputime_,
                self.iters_,
                self.duality_gap_,
            ) = quic(
                self.sample_covariance_,
                self.lam * self.lam_scale_,
                mode=self.mode,
                tol=self.tol,
                max_iter=self.max_iter,
                Theta0=self.Theta0,
                Sigma0=self.Sigma0,
                path=self.path_,
                msg=self.verbose,
            )
        else:
            raise NotImplementedError(
                "Only method='quic' has been implemented.")

        self.is_fitted_ = True
        return self
Example #30
0
File: zca.py Project: mwv/zca
    def inverse_transform(self, X, copy=None):
        """Undo the ZCA transform and rotate back to the original
        representation

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            The data to rotate back.
        """
        check_is_fitted(self, 'mean_')
        X = as_float_array(X, copy=self.copy)
        return np.dot(X, self.dewhiten_) + self.mean_
Example #31
0
    def fit(self, X, y=None):
        from scipy import linalg
        from sklearn.utils import as_float_array

        X = as_float_array(X, copy=self.copy)
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_
        sigma = np.dot(X.T, X) / X.shape[1]
        U, S, V = linalg.svd(sigma)
        tmp = np.dot(U, np.diag(1 / np.sqrt(S + self.regularization)))
        self.components_ = np.dot(tmp, U.T)
        return self
def test_as_float_array():
    # Test function for as_float_array
    X = np.ones((3, 10), dtype=np.int32)
    X = X + np.arange(10, dtype=np.int32)
    # Checks that the return type is ok
    X2 = as_float_array(X, copy=False)
    np.testing.assert_equal(X2.dtype, np.float32)
    # Another test
    X = X.astype(np.int64)
    X2 = as_float_array(X, copy=True)
    # Checking that the array wasn't overwritten
    assert_true(as_float_array(X, False) is not X)
    # Checking that the new type is ok
    np.testing.assert_equal(X2.dtype, np.float64)
    # Here, X is of the right type, it shouldn't be modified
    X = np.ones((3, 2), dtype=np.float32)
    assert_true(as_float_array(X, copy=False) is X)
    # Test that if X is fortran ordered it stays
    X = np.asfortranarray(X)
    assert_true(np.isfortran(as_float_array(X, copy=True)))

    # Test the copy parameter with some matrices
    matrices = [
        np.matrix(np.arange(5)),
        sp.csc_matrix(np.arange(5)).toarray(),
        sparse_random_matrix(10, 10, density=0.10).toarray()
    ]
    for M in matrices:
        N = as_float_array(M, copy=True)
        N[0, 0] = np.nan
        assert_false(np.isnan(M).any())
Example #33
0
 def fit(self, X, y=None):
     X = array2d(X)
     n_samples, n_features = X.shape
     X = as_float_array(X, copy=self.copy)
     self.mean_ = np.mean(X, axis=0)
     X -= self.mean_
     eigs, eigv = eigh(np.dot(X.T, X) / n_samples + \
                      self.bias * np.identity(n_features))
     components = np.dot(eigv * np.sqrt(1.0 / eigs), eigv.T)
     self.components_ = components
     #Order the explained variance from greatest to least
     self.explained_variance_ = eigs[::-1]
     return self
Example #34
0
    def predict(self, X):
        """
        compute the correlation coefficient with irpas signature
        """
        signature = self.get_signature()

        X = as_float_array(X)
        X_transformed = self.transform(X) - signature[1]
        corrcoef = np.array(
            [np.corrcoef(signature[0], e)[0][1] for e in X_transformed])
        corrcoef[np.isnan(corrcoef)] = np.finfo(np.float32).min

        return corrcoef
Example #35
0
def mean_squared_error(y_true, y_pred):
    """
    Root mean square error, mean square error, and its standard deviation.

    If you need only RMSE, use 
    :func:`sklearn.metrics.mean_absolute_error`

    Parameters
    ----------
    y_true : array, shape(n_samples,)
        Ground truth scores
    y_pred : array, shape(n_samples,)
        Predicted scores

    Returns
    -------
    rmse : float
        root mean squared error
    mean : float
        mean of absolute errors
    stdev : float
        standard deviation of absolute errors
    """

    # check inputs
    assert_all_finite(y_true)
    y_true = as_float_array(y_true)
    assert_all_finite(y_pred)
    y_pred = as_float_array(y_pred)
    check_consistent_length(y_true, y_pred)

    # calculate errors
    errs = (y_true - y_pred) ** 2
    mean = np.nanmean(errs)
    stdev = np.nanstd(errs)
    rmse = np.sqrt(np.maximum(mean, 0.))

    return rmse, mean, stdev
Example #36
0
def mean_squared_error(y_true, y_pred):
    """
    Root mean square error, mean square error, and its standard deviation.

    If you need only RMSE, use 
    :func:`sklearn.metrics.mean_absolute_error`

    Parameters
    ----------
    y_true : array, shape(n_samples,)
        Ground truth scores
    y_pred : array, shape(n_samples,)
        Predicted scores

    Returns
    -------
    rmse : float
        root mean squared error
    mean : float
        mean of absolute errors
    stdev : float
        standard deviation of absolute errors
    """

    # check inputs
    assert_all_finite(y_true)
    y_true = as_float_array(y_true)
    assert_all_finite(y_pred)
    y_pred = as_float_array(y_pred)
    check_consistent_length(y_true, y_pred)

    # calculate errors
    errs = (y_true - y_pred)**2
    mean = np.nanmean(errs)
    stdev = np.nanstd(errs)
    rmse = np.sqrt(np.maximum(mean, 0.))

    return rmse, mean, stdev
Example #37
0
 def _validate_X(self, X):
     if len(X.shape) == 1:
         raise ValueError('X should be a 2-dimensional array.')
     #    if one feature:
     #        X = X.reshape(1,-1)
     #    else:  # one sample
     #        X = X.reshape(-1,1)
     if X.shape[0] == 0:
         raise ValueError('Empty samples.')
     if X.shape[1] == 0:
         raise ValueError(
             '0 feature(s) (shape=(3, 0)) while a minimum of %d is required.'
             % (1, ))
     return as_float_array(check_array(X))
Example #38
0
    def fit(self, X, y=None, **params):
        """Fit the model with X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Returns the instance itself.
            
        Notes
        -----
        Calling multiple times will update the components
        """
        
        X = array2d(X)
        n_samples, n_features = X.shape 
        X = as_float_array(X, copy=self.copy)
        
        # init
        if self.iteration == 0:  
            self.mean_ = np.zeros([n_features], np.float)
            self.components_ = np.zeros([self.n_components,n_features], np.float)
        else:
            if n_features != self.components_.shape[1]:
                raise ValueError('The dimensionality of the new data and the existing components_ does not match')   
        
        # incrementally fit the model
        for i in range(0,X.shape[0]):
            self.partial_fit(X[i,:])
        
        # update explained_variance_ratio_
        self.explained_variance_ratio_ = np.sqrt(np.sum(self.components_**2,axis=1))
        
        # sort by explained_variance_ratio_
        idx = np.argsort(-self.explained_variance_ratio_)
        self.explained_variance_ratio_ = self.explained_variance_ratio_[idx]
        self.components_ = self.components_[idx,:]
        
        # re-normalize
        self.explained_variance_ratio_ = (self.explained_variance_ratio_ / self.explained_variance_ratio_.sum())
            
        for r in range(0,self.components_.shape[0]):
            self.components_[r,:] /= np.sqrt(np.dot(self.components_[r,:],self.components_[r,:]))
        
        return self
Example #39
0
 def _center_data(self, X, y):
     ''' Centers data'''
     X = as_float_array(X, self.copy_X)
     # normalisation should be done in preprocessing!
     X_std = np.ones(X.shape[1], dtype=X.dtype)
     if self.fit_intercept:
         X_mean = np.average(X, axis=0)
         y_mean = np.average(y, axis=0)
         X -= X_mean
         y = y - y_mean
     else:
         X_mean = np.zeros(X.shape[1], dtype=X.dtype)
         y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
     return X, y, X_mean, y_mean, X_std
Example #40
0
 def _center_data(self,X,y):
     ''' Centers data'''
     X     = as_float_array(X,self.copy_X)
     # normalisation should be done in preprocessing!
     X_std = np.ones(X.shape[1], dtype = X.dtype)
     if self.fit_intercept:
         X_mean = np.average(X,axis = 0)
         y_mean = np.average(y,axis = 0)
         X     -= X_mean
         y      = y - y_mean
     else:
         X_mean = np.zeros(X.shape[1],dtype = X.dtype)
         y_mean = 0. if y.ndim == 1 else np.zeros(y.shape[1], dtype=X.dtype)
     return X,y, X_mean, y_mean, X_std
def test_memmap():
    # Confirm that input validation code doesn't copy memory mapped arrays

    asflt = lambda x: as_float_array(x, copy=False)

    with NamedTemporaryFile(prefix='sklearn-test') as tmp:
        M = np.memmap(tmp, shape=100, dtype=np.float32)
        M[:] = 0

        for f in (check_array, np.asarray, asflt):
            X = f(M)
            X[:] = 1
            assert_array_equal(X.ravel(), M)
            X[:] = 0
Example #42
0
    def _fit(self, X):
        """Fit the model to the data X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        Returns
        -------
        X : ndarray, shape (n_samples, n_features)
            The input data, copied, centered and whitened when requested.
        """
        random_state = check_random_state(self.random_state)
        if hasattr(X, 'todense'):
            warnings.warn(
                "Sparse matrix support is deprecated"
                " and will be dropped in 0.16."
                " Use TruncatedSVD instead.", DeprecationWarning)
        else:
            # not a sparse matrix, ensure this is a 2D array
            X = np.atleast_2d(as_float_array(X, copy=self.copy))

        n_samples = X.shape[0]

        if not hasattr(X, 'todense'):
            # Center data
            self.mean_ = np.mean(X, axis=0)
            X -= self.mean_
        if self.n_components is None:
            n_components = X.shape[1]
        else:
            n_components = self.n_components

        U, S, V = randomized_svd(X,
                                 n_components,
                                 n_iter=self.iterated_power,
                                 random_state=random_state)

        self.explained_variance_ = exp_var = (S**2) / n_samples
        self.explained_variance_ratio_ = exp_var / exp_var.sum()

        if self.whiten:
            self.components_ = V / S[:, np.newaxis] * sqrt(n_samples)
        else:
            self.components_ = V

        return X
Example #43
0
    def fit(self, X, y=None):
        print("Fitting ... ", end="")

        X = as_float_array(X, copy=self.copy)
        self.mean_ = cp.mean(X, axis=0)
        X = X - self.mean_
        sigma = cp.dot(X.T, X) / (X.shape[0] - 1)

        U, S, V = linalg.svd(sigma)
        tmp = cp.dot(cp.array(U),
                     cp.diag(1 / cp.sqrt(cp.array(S) + self.regularization)))
        self.components_ = cp.dot(tmp, cp.array(U).T)

        print("done")
        return self
Example #44
0
def test_np_matrix():
    """Confirm that input validation code does not return np.matrix"""
    X = np.arange(12).reshape(3, 4)

    assert_false(isinstance(as_float_array(X), np.matrix))
    assert_false(isinstance(as_float_array(np.matrix(X)), np.matrix))
    assert_false(isinstance(as_float_array(sp.csc_matrix(X)), np.matrix))

    assert_false(isinstance(atleast2d_or_csr(X), np.matrix))
    assert_false(isinstance(atleast2d_or_csr(np.matrix(X)), np.matrix))
    assert_false(isinstance(atleast2d_or_csr(sp.csc_matrix(X)), np.matrix))

    assert_false(isinstance(atleast2d_or_csc(X), np.matrix))
    assert_false(isinstance(atleast2d_or_csc(np.matrix(X)), np.matrix))
    assert_false(isinstance(atleast2d_or_csc(sp.csr_matrix(X)), np.matrix))

    assert_false(isinstance(safe_asarray(X), np.matrix))
    assert_false(isinstance(safe_asarray(np.matrix(X)), np.matrix))
    assert_false(isinstance(safe_asarray(sp.lil_matrix(X)), np.matrix))

    assert_true(atleast2d_or_csr(X, copy=False) is X)
    assert_false(atleast2d_or_csr(X, copy=True) is X)
    assert_true(atleast2d_or_csc(X, copy=False) is X)
    assert_false(atleast2d_or_csc(X, copy=True) is X)
Example #45
0
def calculate_purity(labels_true, labels_pred):
    labels_true = np.array(labels_true)
    labels_true = labels_true.reshape(labels_true.size)
    labels_pred = np.array(labels_pred)
    labels_pred = labels_pred.reshape(labels_pred.size)

    k = np.size(np.unique(labels_pred))
    purityVector = np.zeros(k)
    purity = 0
    matrix = as_float_array(contingency_matrix(labels_true, labels_pred))
    for i in xrange(k):
        moda = np.float(np.max(matrix[:, i]))
        purityVector[i] = moda / np.sum(matrix[:, i])
        purity += purityVector[i] * np.sum(matrix[:, i]) / np.size(labels_pred)
    return purity, purityVector
Example #46
0
 def fit(self, X, y=None):
     X = array2d(X)
     X = as_float_array(X, copy=self.copy)
     self.mean_ = np.mean(X, axis=0)
     X -= self.mean_
     X = X.T
     examples = np.shape(X)[1]
     sigma = np.dot(X, X.T) / (examples - 1)
     U, S, V = linalg.svd(sigma)
     d = np.sqrt(1 / S[0:100])
     dd = np.append(d, np.zeros((np.shape(X)[0] - 100)))
     #tmp = np.dot(U, np.diag(1/np.sqrt(S +self.regularization)))
     tmp = np.dot(U, np.diag(dd))
     self.components_ = np.dot(tmp, U.T)
     return self
Example #47
0
 def fit(self, X, y=None):
     X = array2d(X)
     X = as_float_array(X, copy = self.copy)
     self.mean_ = np.mean(X, axis=0)
     X -= self.mean_
     X = X.T
     examples = np.shape(X)[1]
     sigma = np.dot(X,X.T) / (examples - 1)
     U, S, V = linalg.svd(sigma)
     d = np.sqrt(1/S[0:100])
     dd = np.append(d, np.zeros((np.shape(X)[0] - 100)))
     #tmp = np.dot(U, np.diag(1/np.sqrt(S +self.regularization)))
     tmp = np.dot(U, np.diag(dd))
     self.components_ = np.dot(tmp, U.T)
     return self
Example #48
0
    def _fit(self, X):
        """Fit the model to the data X.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.

        Returns
        -------
        X : ndarray, shape (n_samples, n_features)
            The input data, copied, centered and whitened when requested.
        """
        random_state = check_random_state(self.random_state)
        if hasattr(X, 'todense'):
            warnings.warn("Sparse matrix support is deprecated"
                          " and will be dropped in 0.16."
                          " Use TruncatedSVD instead.",
                          DeprecationWarning)
        else:
            # not a sparse matrix, ensure this is a 2D array
            X = np.atleast_2d(as_float_array(X, copy=self.copy))

        n_samples = X.shape[0]

        if not hasattr(X, 'todense'):
            # Center data
            self.mean_ = np.mean(X, axis=0)
            X -= self.mean_
        if self.n_components is None:
            n_components = X.shape[1]
        else:
            n_components = self.n_components

        U, S, V = randomized_svd(X, n_components,
                                 n_iter=self.iterated_power,
                                 random_state=random_state)

        self.explained_variance_ = exp_var = (S ** 2) / n_samples
        self.explained_variance_ratio_ = exp_var / exp_var.sum()

        if self.whiten:
            self.components_ = V / S[:, np.newaxis] * sqrt(n_samples)
        else:
            self.components_ = V

        return X
Example #49
0
    def fit(self, features_train):

        X = array2d(features_train)
        n_samples, n_features = X.shape
        print 'given train features dimensions before PCA : ', features_train.shape

        X = as_float_array(X)

        # Data preprocessing by Mean Normalization
        # Center data
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_

        # Compute covariance matrix
        cov_matrix = np.dot(np.transpose(X), X) / n_samples
        print 'cov_matrix dimensions : ', cov_matrix.shape
        # Compute SVD
        U, S, V = linalg.svd(cov_matrix, full_matrices=1, compute_uv=1)
        print 'x dimensions : ', X.shape
        print 'U dimensions : ', U.shape
        print 'S dimensions : ', S.shape

        # Calculate optimal k - min number of principal components to maintain 99% of variance
        variance_retained = np.sum(S[:self.k_components]) / np.sum(S)

        while variance_retained < self.variance_percent_retained:
            self.k_components += 1
            variance_retained = np.sum(S[:self.k_components]) / np.sum(S)
            #print 'k_components : ', self.k_components, ' variance : ', variance_retained

        if self.k_components is None:
            self.k_components = n_features
        elif not 0 <= self.k_components <= n_features:
            raise ValueError("n_components=%r invalid for n_features=%d" %
                             (self.k_components, n_features))

        self.components = U

        self.U_reduce = U[:, :self.k_components]
        print 'number of principal components : ', self.k_components

        self.U = U
        self.S = S
        self.V = V

        return (U, S, V)
Example #50
0
def get_kMeans(fileData,normalized_axis=1,norm = 'l1'):
    data = np.load(fileData)
    #print data
    features = data['data']
    n_clusters = np.size(np.unique(data['labels']))
    features = as_float_array(features)
    #print features
    if normalized_axis != None:
        features = normalize(features,norm=norm,axis=normalized_axis)
    print features
    model = KMeans(n_clusters = n_clusters,tol=1e-2)
    print model
    model.fit(features)
    print model.labels_
    labels_pred = model.labels_
    labels_true = data['labels']
    return labels_true, labels_pred,features
def test_as_float_array():
    # Test function for as_float_array
    X = np.ones((3, 10), dtype=np.int32)
    X = X + np.arange(10, dtype=np.int32)
    X2 = as_float_array(X, copy=False)
    assert X2.dtype == np.float32
    # Another test
    X = X.astype(np.int64)
    X2 = as_float_array(X, copy=True)
    # Checking that the array wasn't overwritten
    assert as_float_array(X, False) is not X
    assert X2.dtype == np.float64
    # Test int dtypes <= 32bit
    tested_dtypes = [
        np.bool, np.int8, np.int16, np.int32, np.uint8, np.uint16, np.uint32
    ]
    for dtype in tested_dtypes:
        X = X.astype(dtype)
        X2 = as_float_array(X)
        assert X2.dtype == np.float32

    # Test object dtype
    X = X.astype(object)
    X2 = as_float_array(X, copy=True)
    assert X2.dtype == np.float64

    # Here, X is of the right type, it shouldn't be modified
    X = np.ones((3, 2), dtype=np.float32)
    assert as_float_array(X, copy=False) is X
    # Test that if X is fortran ordered it stays
    X = np.asfortranarray(X)
    assert np.isfortran(as_float_array(X, copy=True))

    # Test the copy parameter with some matrices
    matrices = [
        np.matrix(np.arange(5)),
        sp.csc_matrix(np.arange(5)).toarray(),
        _sparse_random_matrix(10, 10, density=0.10).toarray()
    ]
    for M in matrices:
        N = as_float_array(M, copy=True)
        N[0, 0] = np.nan
        assert not np.isnan(M).any()
def test_as_float_array():
    # Test function for as_float_array
    X = np.ones((3, 10), dtype=np.int32)
    X = X + np.arange(10, dtype=np.int32)
    X2 = as_float_array(X, copy=False)
    assert_equal(X2.dtype, np.float32)
    # Another test
    X = X.astype(np.int64)
    X2 = as_float_array(X, copy=True)
    # Checking that the array wasn't overwritten
    assert as_float_array(X, False) is not X
    assert_equal(X2.dtype, np.float64)
    # Test int dtypes <= 32bit
    tested_dtypes = [np.bool,
                     np.int8, np.int16, np.int32,
                     np.uint8, np.uint16, np.uint32]
    for dtype in tested_dtypes:
        X = X.astype(dtype)
        X2 = as_float_array(X)
        assert_equal(X2.dtype, np.float32)

    # Test object dtype
    X = X.astype(object)
    X2 = as_float_array(X, copy=True)
    assert_equal(X2.dtype, np.float64)

    # Here, X is of the right type, it shouldn't be modified
    X = np.ones((3, 2), dtype=np.float32)
    assert as_float_array(X, copy=False) is X
    # Test that if X is fortran ordered it stays
    X = np.asfortranarray(X)
    assert np.isfortran(as_float_array(X, copy=True))

    # Test the copy parameter with some matrices
    matrices = [
        np.matrix(np.arange(5)),
        sp.csc_matrix(np.arange(5)).toarray(),
        sparse_random_matrix(10, 10, density=0.10).toarray()
    ]
    for M in matrices:
        N = as_float_array(M, copy=True)
        N[0, 0] = np.nan
        assert not np.isnan(M).any()
 def fit(self, X, y=None):
     # X = array2d(X)
     n_samples, n_features = X.shape
     X = as_float_array(X, copy=self.copy) #np.require(X, dtype=np.float32) #
     self.mean_ = np.mean(X, axis=0)
     self.std_ = np.std(X, axis=0)
     X -= self.mean_
     X /= self.std_
     sigma = np.dot(X.T, X)/n_samples
     d, V = np.linalg.eigh(sigma)
     # u,s,v = np.linalg.svd(sigma)
     
     #eigs, eigv = eigh(np.dot(X.T, X) / n_samples + \
     #                 self.bias * np.identity(n_features))
     D = np.diag(1./np.sqrt(d + self.epsilon))
     components = np.dot(np.dot(V, D), V.T)
     self.components_ = components
     return self
    def fit(self, X, y):
        """Fit the model using X, y as training data.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training data.

        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : object
            Returns an instance of self.
        """
        X, y = check_X_y(X, y, ['csr', 'csc'], y_numeric=True,
                         ensure_min_samples=2, estimator=self)
        X = as_float_array(X, copy=False)
        n_samples, n_features = X.shape

        X, y, X_offset, y_offset, X_scale = \
            self._preprocess_data(X, y, self.fit_intercept, self.normalize)

        estimator_func, params = self._make_estimator_and_params(X, y)
        memory = self.memory
        if isinstance(memory, six.string_types):
            memory = Memory(cachedir=memory)

        scores_ = memory.cache(
            _resample_model, ignore=['verbose', 'n_jobs', 'pre_dispatch']
        )(
            estimator_func, X, y,
            scaling=self.scaling, n_resampling=self.n_resampling,
            n_jobs=self.n_jobs, verbose=self.verbose,
            pre_dispatch=self.pre_dispatch, random_state=self.random_state,
            sample_fraction=self.sample_fraction, **params)

        if scores_.ndim == 1:
            scores_ = scores_[:, np.newaxis]
        self.all_scores_ = scores_
        self.scores_ = np.max(self.all_scores_, axis=1)
        return self
Example #55
0
    def fit2(self, X, y):
        """
        Fit the model using X, y as training data.

        Using Woodbury formula

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape [n_samples, n_features]
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like of shape [n_samples, n_outputs]
            Target values (class labels in classification, real numbers in
            regression)

        Returns
        -------
        self : object

            Returns an instance of self.
        """
        # fit random hidden layer and compute the hidden layer activations
        #self.H = self.hidden_layer.fit_transform(X)
        H = self._create_random_layer().fit_transform(X)
        y = as_float_array(y, copy=True)

        if self.beta is None:
            # Then, this is the first time the model is fitted
            assert len(X) >= self.n_hidden, ValueError(
                "The first time the model is fitted, X must have "
                "at least equal number of samples than n_hidden value!")
            # TODO: handle cases of singular matrices (maybe with a try clause)
            self.P = pinv2(safe_sparse_dot(H.T, H))
            self.beta = multiple_safe_sparse_dot(self.P, H.T, y)
        else:
            M = np.eye(len(H)) + multiple_safe_sparse_dot(H, self.P, H.T)
            self.P -= multiple_safe_sparse_dot(self.P, H.T, pinv2(M), H,
                                               self.P)
            e = y - safe_sparse_dot(H, self.beta)
            self.beta += multiple_safe_sparse_dot(self.P, H.T, e)

        return self
Example #56
0
    def _fit(self, X):
        """Fit the model to the data X.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
        Training vector, where n_samples in the number of samples and
        n_features is the number of features.

    Returns
    -------
    X : ndarray, shape (n_samples, n_features)
        The input data, copied, centered and whitened when requested.
    """
        random_state = check_random_state(self.random_state)
        X = np.atleast_2d(as_float_array(X, copy=self.copy))

        n_samples = X.shape[0]

        # Center data
        self.mean_ = np.mean(X, axis=0)
        X -= self.mean_
        if self.n_components is None:
            n_components = X.shape[1]
        else:
            n_components = self.n_components

        U, S, V = randomized_svd(X,
                                 n_components,
                                 n_iter=self.iterated_power,
                                 random_state=random_state)

        self.explained_variance_ = exp_var = (S**2) / (n_samples - 1)
        full_var = np.var(X, ddof=1, axis=0).sum()
        self.explained_variance_ratio_ = exp_var / full_var
        self.singular_values_ = S  # Store the singular values.

        if self.whiten:
            self.components_ = V / S[:, np.newaxis] * math.sqrt(n_samples)
        else:
            self.components_ = V

        return X
Example #57
0
                def on_next(obj):
                    nonlocal self
                    X = obj[["p_log", "q_log"]]
                    check_is_fitted(self, ["is_fitted"])
                    utils.assert_all_finite(X)
                    X = utils.as_float_array(X)
                    self._update_clustering(X)

                    obj_2 = {
                        "i_min": np.min(obj[["i"]]),
                        "i_max": np.max(obj[["i"]]),
                        "cluster": self.clustering,
                        "X": X
                    }

                    if "start_time" in obj.keys():
                        obj_2["start_time"] = obj.iloc[-1]["start_time"]

                    observer.on_next(obj_2)