Esempio n. 1
0
    def _pre_fit(self, X, y):
        X, event, time = check_arrays_survival(X, y, copy=self.copy_X)
        # center feature matrix
        X_offset = numpy.average(X, axis=0)
        X -= X_offset
        if self.normalize:
            X = f_normalize(X, copy=False, axis=0)

        # sort descending
        o = numpy.argsort(-time, kind="mergesort")
        X = numpy.asfortranarray(X[o, :])
        event_num = event[o].astype(numpy.uint8)
        time = time[o].astype(numpy.float64)
        return X, event_num, time
Esempio n. 2
0
    def _pre_fit(self, X, y):
        X, event, time = check_arrays_survival(X, y, copy=self.copy_X)
        X = self._validate_data(X)
        # center feature matrix
        X_offset = numpy.average(X, axis=0)
        X -= X_offset
        if self.normalize:
            X, X_scale = f_normalize(X, copy=False, axis=0, return_norm=True)
        else:
            X_scale = numpy.ones(X.shape[1], dtype=X.dtype)

        # sort descending
        o = numpy.argsort(-time, kind="mergesort")
        X = numpy.asfortranarray(X[o, :])
        event_num = event[o].astype(numpy.uint8)
        time = time[o].astype(numpy.float64)
        return X, event_num, time, X_offset, X_scale
Esempio n. 3
0
def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
                     sample_weight=None, return_mean=False, check_input=True):
    """Center and scale data.
    Centers data to have mean zero along axis 0. If fit_intercept=False or if
    the X is a sparse matrix, no centering is done, but normalization can still
    be applied. The function returns the statistics necessary to reconstruct
    the input data, which are X_offset, y_offset, X_scale, such that the output
        X = (X - X_offset) / X_scale
    X_scale is the L2 norm of X - X_offset. If sample_weight is not None,
    then the weighted mean of X and y is zero, and not the mean itself. If
    return_mean=True, the mean, eventually weighted, is returned, independently
    of whether X was centered (option used for optimization with sparse data in
    coordinate_descend).
    This is here because nearly all linear models will want their data to be
    centered. This function also systematically makes y consistent with X.dtype
    """
    if isinstance(sample_weight, numbers.Number):
        sample_weight = None
    if sample_weight is not None:
        sample_weight = np.asarray(sample_weight)

    if check_input:
        X = check_array(X, copy=copy, accept_sparse=['csr', 'csc'],
                        dtype=FLOAT_DTYPES)
    elif copy:
        if issparse(X):
            X = X.copy()
        else:
            X = X.copy(order='K')

    y = np.asarray(y, dtype=X.dtype)

    if fit_intercept:
        if issparse(X):
            X_offset, X_var = mean_variance_axis(X, axis=0)
            if not return_mean:
                X_offset[:] = X.dtype.type(0)

            if normalize:

                # TODO: f_normalize could be used here as well but the function
                # inplace_csr_row_normalize_l2 must be changed such that it
                # can return also the norms computed internally

                # transform variance to norm in-place
                X_var *= X.shape[0]
                X_scale = np.sqrt(X_var, X_var)
                del X_var
                X_scale[X_scale == 0] = 1
                inplace_column_scale(X, 1. / X_scale)
            else:
                X_scale = np.ones(X.shape[1], dtype=X.dtype)

        else:
            X_offset = np.average(X, axis=0, weights=sample_weight)
            X -= X_offset
            if normalize:
                X, X_scale = f_normalize(X, axis=0, copy=False,
                                         return_norm=True)
            else:
                X_scale = np.ones(X.shape[1], dtype=X.dtype)
        y_offset = np.average(y, axis=0, weights=sample_weight)
        y = y - y_offset
    else:
        if normalize:
            if issparse(X):
                _, X_var = mean_variance_axis(X, axis=0)
                # transform variance to norm in-place
                X_var *= X.shape[0]
                X_scale = np.sqrt(X_var, X_var)
                del X_var
                X_scale[X_scale == 0] = 1
                inplace_column_scale(X, 1. / X_scale)
            else:
                X, X_scale = f_normalize(X, axis=0, copy=False,
                                         return_norm=True)
        else:
            X_scale = np.ones(X.shape[1], dtype=X.dtype)
        X_offset = np.zeros(X.shape[1], dtype=X.dtype)
        if y.ndim == 1:
            y_offset = X.dtype.type(0)
        else:
            y_offset = np.zeros(y.shape[1], dtype=X.dtype)

    return X, y, X_offset, y_offset, X_scale