Exemple #1
0
def _dimReduce(df, method='pca', n_components=2, labels=None, standardize=False, smatFunc=None, ldaShrinkage='auto'):
    if method == 'kpca':
        """By using KernelPCA for dimensionality reduction we don't need to impute missing values"""
        if smatFunc is None:
            smatFunc = corrTSmatFunc
        pca = KernelPCA(kernel='precomputed', n_components=n_components)
        smat = smatFunc(df).values
        xy = pca.fit_transform(smat)
        pca.components_ = pca.alphas_
        pca.explained_variance_ratio_ = pca.lambdas_ / pca.lambdas_.sum()
        return xy, pca
    elif method == 'pca':
        if standardize:
            normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0)
        else:
            normed = df.apply(lambda vec: vec - vec.mean(), axis=0)
        pca = PCA(n_components=n_components)
        xy = pca.fit_transform(normed)
        return xy, pca
    elif method == 'lda':
        if labels is None:
            raise ValueError('labels needed to perform LDA')
        if standardize:
            normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0)
        else:
            normed = df.apply(lambda vec: vec - vec.mean(), axis=0)
        
        if df.shape[1] > df.shape[0]:
            """Pre-PCA step"""
            ppca = PCA(n_components=int(df.shape[0]/1.5))
            normed = ppca.fit_transform(df)

        lda = LinearDiscriminantAnalysis(solver='eigen', shrinkage=ldaShrinkage, n_components=n_components)
        lda.fit(normed, labels.values)
        lda.explained_variance_ratio_ = np.abs(lda.explained_variance_ratio_) / np.abs(lda.explained_variance_ratio_).sum()
        xy = lda.transform(normed)
        return xy, lda
    elif method == 'pls':
        if labels is None:
            raise ValueError('labels needed to perform PLS')
        if standardize:
            normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0)
        else:
            normed = df.apply(lambda vec: vec - vec.mean(), axis=0)
        
        pls = PLSRegression(n_components=n_components)
        pls.fit(normed, labels)
        
        pls.explained_variance_ratio_ = np.zeros(n_components)
        xy = pls.x_scores_
        return xy, pls
Exemple #2
0
def _dimReduce(df, method='pca', n_components=2, labels=None, standardize=False, smatFunc=None, ldaShrinkage='auto'):
    if method == 'kpca':
        """By using KernelPCA for dimensionality reduction we don't need to impute missing values"""
        if smatFunc is None:
            smatFunc = corrTSmatFunc
        pca = KernelPCA(kernel='precomputed', n_components=n_components)
        smat = smatFunc(df).values
        xy = pca.fit_transform(smat)
        pca.components_ = pca.alphas_
        pca.explained_variance_ratio_ = pca.lambdas_ / pca.lambdas_.sum()
        return xy, pca
    elif method == 'pca':
        if standardize:
            normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0)
        else:
            normed = df.apply(lambda vec: vec - vec.mean(), axis=0)
        pca = PCA(n_components=n_components)
        xy = pca.fit_transform(normed)
        return xy, pca
    elif method == 'lda':
        if labels is None:
            raise ValueError('labels needed to perform LDA')
        if standardize:
            normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0)
        else:
            normed = df.apply(lambda vec: vec - vec.mean(), axis=0)
        
        if df.shape[1] > df.shape[0]:
            """Pre-PCA step"""
            ppca = PCA(n_components=int(df.shape[0]/1.5))
            normed = ppca.fit_transform(df)

        lda = LinearDiscriminantAnalysis(solver='eigen', shrinkage=ldaShrinkage, n_components=n_components)
        lda.fit(normed, labels.values)
        lda.explained_variance_ratio_ = np.abs(lda.explained_variance_ratio_) / np.abs(lda.explained_variance_ratio_).sum()
        xy = lda.transform(normed)
    elif method == 'pls':
        if labels is None:
            raise ValueError('labels needed to perform PLS')
        if standardize:
            normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0)
        else:
            normed = df.apply(lambda vec: vec - vec.mean(), axis=0)
        
        pls = PLSRegression(n_components=n_components)
        pls.fit(normed, labels)
        
        pls.explained_variance_ratio_ = np.zeros(n_components)
        xy = pls.x_scores_
        return xy, pls