Esempio n. 1
0
def loo_regressions(xs, ys, ft, dt, mt):
    print '[INFO]', ft, dt

    # Align matricies
    x = xs.loc[:, ys.columns].dropna(axis=1).T
    y = ys[x.index].T

    # Define cross-validation
    cv = LeaveOneOut(len(y))

    # Run regressions
    y_pred, y_betas = {}, {}
    for m in y:
        y_pred[m] = {}

        betas = []
        for train, test in cv:
            lm = ElasticNet(alpha=0.01).fit(x.ix[train], y.ix[train, m])
            y_pred[m][x.index[test][0]] = lm.predict(x.ix[test])[0]

            betas.append(dict(zip(*(x.columns, lm.coef_))))

        y_betas[m] = DataFrame(betas).median().to_dict()

    y_pred = DataFrame(y_pred).ix[y.index, y.columns]
    print '[INFO] Regression done: ', ft, dt

    # Perform correlation with predicted values
    metabolites_corr = [(ft, dt, f, mt, 'metabolites',
                         pearson(y[f], y_pred[f])[0]) for f in y_pred]
    conditions_corr = [(ft, dt, s, mt, 'conditions',
                        pearson(y.ix[s], y_pred.ix[s])[0])
                       for s in y_pred.index]

    return (metabolites_corr + conditions_corr), (ft, dt, mt, y_betas)
Esempio n. 2
0
    if df_type == 'Kinases/Phosphatases':
        df = df[(df.count(1) / df.shape[1]) > .75]

    # Conditions overlap
    conditions = list(set(growth.index).intersection(df))

    # PCA analysis
    pca = PCA(n_components=n_components).fit(df.T.replace(np.nan, 0))
    pca_pc = DataFrame(
        pca.transform(df.T.replace(np.nan, 0)),
        columns=['PC%d' % i for i in range(1, n_components + 1)],
        index=df.columns)

    # Plot correlation with PCA
    ax = plt.subplot(gs[pos])
    cor, pvalue, nmeas = pearson(growth[pca_pc.index], pca_pc[pc])
    sns.regplot(growth[pca_pc.index], pca_pc[pc], ax=ax, color='#4c4c4c')
    ax.axhline(0, ls='-', lw=0.1, c='black', alpha=.3)
    ax.axvline(0, ls='-', lw=0.1, c='black', alpha=.3)
    ax.set_title('%s - %s\n(Pearson: %.2f, p-value: %.1e)' %
                 (dataset_type, df_type, cor, pvalue))
    ax.set_xlabel('Relative growth (centered)')
    ax.set_ylabel(
        'PC%d (%.1f%%)' %
        (int(pc[-1:]), pca.explained_variance_ratio_[int(pc[-1:]) - 1] * 100))
    sns.despine(trim=True, ax=ax)

    ax = plt.subplot(gs[pos + 1])
    plot_df = DataFrame(zip(['PC%d' % i for i in range(1, n_components + 1)],
                            pca.explained_variance_ratio_),
                        columns=['PC', 'var'])
    for m in metabolomics_dyn_ng.index:
        for c in conditions:
            ys = y.ix[m, [i for i in y if not i.startswith(c)]]
            xs = x[ys.index].T

            yss = y.ix[m, [i for i in y if i.startswith(c)]]
            xss = x[yss.index].T

            lm = ElasticNet(alpha=0.01).fit(xs, ys)
            pred = Series(lm.predict(xss), index=xss.index)

            features = dict(zip(*(xs.columns, lm.coef_)))
            for f in features:
                lm_feat.append((feature_type, method_type, m, c, f, features[f]))

            lm_res.append((feature_type, method_type, m, c, pearson(yss, pred)[0]))

lm_res = DataFrame(lm_res, columns=['feature', 'method', 'ion', 'condition', 'pearson'])
lm_res['metabolite'] = [met_name[i] for i in lm_res['ion']]
print lm_res.head()

lm_feat = DataFrame(lm_feat, columns=['feature_type', 'method', 'ion', 'condition', 'feature', 'coefficient'])
lm_feat['m_name'] = [met_name[i] for i in lm_feat['ion']]
lm_feat['f_name'] = [acc_name[i] for i in lm_feat['feature']]
print lm_feat.head()


# -- Plot
palette = {'TFs': '#34495e', 'Kinases': '#3498db'}

# Correlation boxplots
Esempio n. 4
0
    index_col=0)[k_activity_dyn_comb_gsea.columns]
metabolomics_dyn_comb = metabolomics_dyn_comb[
    metabolomics_dyn_comb.std(1) > .4]
metabolomics_dyn_comb.index = ['%.4f' % i for i in metabolomics_dyn_comb.index]

kinases, ions, conditions = set(k_activity_dyn_comb_lm.index), set(
    metabolomics_dyn_comb.index), set(metabolomics_dyn_comb)

# -- Define metabolic to analyse
m = '606.0736'
k_activities = [('gsea', k_activity_dyn_comb_gsea),
                ('lm', k_activity_dyn_comb_lm)]

# -- Kinase activities correlation
m_cor = [(k,
          pearson(k_activity_dyn_comb_lm.ix[k, conditions],
                  metabolomics_dyn_comb.ix[m, conditions])[0],
          pearson(k_activity_dyn_comb_gsea.ix[k, conditions],
                  metabolomics_dyn_comb.ix[m, conditions])[0])
         for k in kinases]
m_cor = DataFrame(m_cor, columns=['kinase', 'lm_cor',
                                  'gsea_cor']).set_index('kinase')

lm_top_features = list(m_cor['lm_cor'].abs().sort(
    inplace=False, ascending=False).head(5).index)
gsea_top_features = list(m_cor['gsea_cor'].abs().sort(
    inplace=False, ascending=False).head(5).index)
top_features = list(set(lm_top_features).union(gsea_top_features))

plot_df = [(m, k, c, method, metabolomics_dyn_comb.ix[m, c], df.ix[k, c])
           for k in top_features for c in conditions
           for method, df in k_activities]