コード例 #1
0
def plot_single_projection(holder,
                           labels,
                           class_name='Antioxidants',
                           fp_name='fps_e3fp_1024bit',
                           standardize=True,
                           preprocess_lda='PCA'):
    '''
    holder should be a dictionary with df's as values and fp-filenames as keys
    labels should be a mapping of DrugCombID: ATC_class
    '''

    from mlxtend.preprocessing import standardize as st
    from sklearn.preprocessing import LabelEncoder
    from sklearn.cluster import KMeans
    from mlxtend.feature_extraction import LinearDiscriminantAnalysis  #in sklearn LDA i'd need to add a dummy class if i want to have 2 components after trasnformation
    from scipy.spatial.distance import pdist

    df_cluster = holder[fp_name].copy()
    df_cluster = df_cluster.loc[df_cluster.index.isin(labels.keys())]
    df_cluster = df_cluster[~df_cluster.index.duplicated(keep='last')]

    if standardize:
        classes = df_cluster.index.copy()
        df_cluster.reset_index(inplace=True, drop=True)
        df_cluster = st(df_cluster)
    else:
        classes = df_cluster.index.copy()
    df_cluster[
        'classes'] = classes  # our classes are mapped to index in labels dictionary
    df_cluster['classes'] = df_cluster['classes'].map(labels)

    df_cluster.loc[df_cluster.classes != class_name,
                   'classes'] = 'not ' + 'class_name'
    #dummy = [0]*(df_cluster.shape[1]-1) + ['dummy']
    #df_cluster.loc[df_cluster.shape[0]] = dummy

    # change labels from str to int
    enc = LabelEncoder()
    real_classes = df_cluster.loc[:, 'classes']
    df_cluster.loc[:, 'classes'] = enc.fit_transform(df_cluster['classes'])
    classes = df_cluster.pop('classes')

    if preprocess_lda == 'PLS':
        from sklearn.cross_decomposition import PLSRegression
        pls = PLSRegression(n_components=10, scale=False)
        temp = pls.fit_transform(df_cluster.values, classes.values)[0]
    elif preprocess_lda == 'PCA':
        from sklearn.decomposition import PCA
        pca = PCA(n_components=0.95, svd_solver='full', whiten=False)
        temp = pca.fit_transform(df_cluster.values)
    elif preprocess_lda == 'kernelPCA':
        from sklearn.decomposition import KernelPCA
        pca = KernelPCA(kernel="rbf", gamma=5)
        temp = pca.fit_transform(df_cluster.values)
    elif preprocess_lda == 'NONE':
        temp = df_cluster.values
    elif preprocess_lda == 'NCA':
        from sklearn.neighbors import NeighborhoodComponentsAnalysis
        nca = NeighborhoodComponentsAnalysis()
        temp = nca.fit_transform(df_cluster.values, classes.values)

    #lda = LinearDiscriminantAnalysis(solver='eigen', shrinkage='auto')
    #lda.fit(temp, classes.values)
    #temp1 = lda.transform(temp)

    lda = LinearDiscriminantAnalysis(n_discriminants=2)
    lda.fit(temp, classes.values)
    temp = lda.transform(temp)
    with warnings.catch_warnings():
        warnings.filterwarnings(
            'ignore',
            'Casting complex values to real discards the imaginary part')
        temp = temp.astype(np.float)  # in case of complex numbers///

    df = pd.DataFrame(index=df_cluster.index, columns=[0, 1], data=temp)
    df['classes'] = real_classes

    km = KMeans(init='k-means++', n_clusters=1, n_init=10)
    km.fit(df.loc[df.classes != class_name, [0, 1]])

    km1 = KMeans(init='k-means++', n_clusters=1, n_init=10)
    km1.fit(df.loc[df.classes == class_name, [0, 1]])

    d = pdist([km.cluster_centers_[0], km1.cluster_centers_[0]])
    d = str(round(d[0], 3))

    fig, ax = plt.subplots(figsize=(6, 6))
    ax.scatter(df.loc[df.classes != class_name, 0],
               df.loc[df.classes != class_name, 1],
               marker=',',
               color='grey')
    ax.scatter(df.loc[df.classes == class_name, 0],
               df.loc[df.classes == class_name, 1],
               marker=',',
               color='orange')

    ax.scatter(km.cluster_centers_[:, 0],
               km.cluster_centers_[:, 1],
               marker='X',
               color='green',
               linewidths=30)

    ax.scatter(km1.cluster_centers_[:, 0],
               km1.cluster_centers_[:, 1],
               marker='X',
               color='red',
               linewidths=30)

    fig.suptitle(class_name + ' ' + d)
    return fig
コード例 #2
0
ファイル: ML_models.py プロジェクト: gitUmaru/prosthetic_limb
def LDA:
    lda = LinearDiscriminantAnalysis(n_discriminants=2)
    lda.fit(X_train, y_train)
    X_lda = lda.transform(X)