Ejemplo n.º 1
0
def batch_coenrichment(fea,
                       graph,
                       safe_scores=None,
                       n_iter=5000,
                       p_value=0.05,
                       _pre_cal_enriched=None):
    """
    Batch find with given feature at all possible features found at safe_scores.
    If _pre_cal_enriched was given, n_iter and p_value will be useless. Or you should modify n_iter and p_value as the params you passed to ``SAFE`` algorithm.

    :param str/list fea: A single feature or a list of feature which is already applied SAFE algorithm.
    :param tmap.tda.Graph.Graph graph:
    :param pd.DataFrame safe_scores: A SAFE score output from ``SAFE_batch`` which must contain all values occur at fea.
    :param int n_iter: Permutation times used at ``SAFE_batch``.
    :param float p_value: The p-value to determine the enriched nodes.
    :param dict _pre_cal_enriched: A pre calculated enriched_centroid which comprised all necessary features will save time.
    :return:
    """
    global_correlative_feas = {}
    sub_correlative_feas = {}
    metainfo = {}

    print('building network...')

    if '__iter__' in dir(fea):
        fea_batch = list(fea)[::]
    elif type(fea) == str:
        fea_batch = [fea]
    else:
        raise SyntaxError

    if _pre_cal_enriched is None and safe_scores is None:
        raise Exception(
            '_pre_cal_enriched and safe_scores must pass one of them')
    elif _pre_cal_enriched is None and safe_scores is not None:
        enriched_centroid = get_significant_nodes(
            graph=graph,
            safe_scores=safe_scores,
            pvalue=p_value,
            n_iter=n_iter,
        )
    else:
        enriched_centroid = _pre_cal_enriched

    for fea in set(fea_batch):
        if fea in safe_scores.columns:
            _global, _local, _meta = coenrichment_for_nodes(
                graph,
                enriched_centroid[fea],
                fea,
                enriched_centroid,
                mode='both')
            global_correlative_feas.update(_global)
            sub_correlative_feas.update(_local)
            metainfo.update(_meta)
        else:
            print("%s doesn't exist at the columns of provided safe_scores.")
    return global_correlative_feas, sub_correlative_feas, metainfo
Ejemplo n.º 2
0
def pairwise_coenrichment(graph, safe_scores, n_iter=5000, p_value=0.05, _pre_cal_enriched=None, verbose=1):
    """
    Pair-wise calculation for co-enrichment of each feature found at safe_scores.
    If _pre_cal_enriched was given, n_iter and p_value is not useless.
    Or you should modify n_iter and p_value to fit the params you passed to ``SAFE`` algorithm.

    :param tmap.tda.Graph.Graph graph:
    :param pd.DataFrame safe_scores: A SAFE score output from ``SAFE_batch`` which must contain all values occur at fea.
    :param int n_iter: Permutation times used at ``SAFE_batch``.
    :param float p_value: The p-value to determine the enriched nodes.
    :param dict _pre_cal_enriched: A pre calculated enriched_centroid which comprised all necessary features will save time.
    :param verbose:
    :return:
    """

    dist_matrix = pd.DataFrame(data=np.nan,
                               index=safe_scores.columns,
                               columns=safe_scores.columns)
    if verbose:
        print('building network...')
        iter_obj = tqdm(safe_scores.columns)
    else:
        iter_obj = safe_scores.columns

    if not _pre_cal_enriched:
        enriched_centroid = get_significant_nodes(graph=graph,
                                                  safe_scores=safe_scores,
                                                  pvalue=p_value,
                                                  n_iter=n_iter)
    else:
        enriched_centroid = _pre_cal_enriched

    for fea in iter_obj:
        _global, _meta = coenrichment_for_nodes(graph,
                                                enriched_centroid[fea],
                                                enriched_centroid,
                                                name=fea,
                                                safe_scores=safe_scores,
                                                mode='global',
                                                _filter=False)
        # _filter to fetch raw fisher-exact test result without any cut-off values.
        for o_f in safe_scores.columns:
            if fea != o_f:
                s1, s2, s3, s4 = _meta[o_f]
                oddsratio, pvalue = _global[o_f]
                if is_enriched(s1, s2, s3, s4):
                    dist_matrix.loc[fea, o_f] = pvalue
                else:
                    dist_matrix.loc[fea, o_f] = 1
        dist_matrix.loc[fea, fea] = 0

    # correct for multiple testing
    corrected_dist_matrix = pd.DataFrame(multipletests(dist_matrix.values.reshape(-1, ),
                                                       method='fdr_bh')[1].reshape(dist_matrix.shape),
                                         index=dist_matrix.index,
                                         columns=dist_matrix.columns)
    return corrected_dist_matrix
Ejemplo n.º 3
0
safe_scores = SAFE_batch(graph, metadata, n_iter=50, _mode='both')
enriched_scores, declined_scores = safe_scores['enrich'], safe_scores[
    'decline']
num_n = len(graph.nodes)
num_f = metadata.shape[1]
assert enriched_scores.shape == (num_n, num_f)
assert declined_scores.shape == (num_n, num_f)

enriched_scores = SAFE_batch(graph, metadata, n_iter=50, _mode='enrich')
assert enriched_scores.shape == (num_n, num_f)

safe_scores = SAFE_batch(graph,
                         metadata,
                         n_iter=50,
                         shuffle_by='sample',
                         _mode='both')
enriched_scores, declined_scores = safe_scores['enrich'], safe_scores[
    'decline']
assert enriched_scores.shape == (num_n, num_f)
assert declined_scores.shape == (num_n, num_f)

from tmap.netx.SAFE import get_significant_nodes

significant_centroids, significant_nodes = get_significant_nodes(
    graph, enriched_scores, r_neighbor=True)

from tmap.netx.SAFE import get_SAFE_summary

safe_summary = get_SAFE_summary(graph, metadata, enriched_scores)
Ejemplo n.º 4
0
def compare_draw(data,
                 graph,
                 fit_result,
                 safe_scores,
                 fea1,
                 fea2=None,
                 nr_threshold=0.5,
                 mode='obj',
                 **kwargs):
    if fea2 is not None:
        col = 2
        subtitles = [
            '%s ordination' % fea1,
            '%s Tmap' % fea1,
            '%s ordination' % fea2,
            '%s Tmap' % fea2
        ]
        feas = [fea1, fea2]
    else:
        col = 1
        subtitles = ['%s ordination' % fea1, '%s Tmap' % fea1]
        feas = [fea1]

    fig = tools.make_subplots(2,
                              col,
                              subplot_titles=subtitles,
                              horizontal_spacing=0,
                              vertical_spacing=0)
    projected_X = graph.data

    def draw_ord_and_tmap(fig, fit_result, fea, row, col, data):
        if fea in data.columns:
            color = Color(data.loc[:, fea].astype(float), target_by='sample')
        else:
            raise Exception(
                'Error occur, %s seem like a new feature for given data' % fea)

        fig.append_trace(
            go.Scatter(
                x=projected_X[:, 0],
                y=projected_X[:, 1],
                #                                      text=metadata.loc[:,fea],
                hoverinfo='text',
                mode='markers',
                marker=dict(color=color.get_sample_colors()),
                showlegend=False),
            row,
            col)
        if fea in fit_result.index:
            fig.append_trace(
                go.Scatter(x=[0, fit_result.loc[fea, 'adj_Source']],
                           y=[0, fit_result.loc[fea, 'adj_End']],
                           mode='lines+text',
                           showlegend=False,
                           text=['', round(fit_result.loc[fea, 'r2'], 4)]),
                row, col)

    enriched_nodes = get_significant_nodes(graph=graph,
                                           safe_scores=safe_scores,
                                           nr_threshold=nr_threshold)  # todo

    fig_container = []
    for idx, fea in enumerate(feas):
        draw_ord_and_tmap(fig, projected_X, fit_result, fea, idx + 1, 1, data)
        cache = {
            node: safe_scores[fea][node] if node in enriched_nodes[fea] else 0
            for node in safe_scores[fea].keys()
        }
        f = vis_progressX(graph,
                          color=Color(cache, target_by='node'),
                          mode='obj',
                          simple=True)
        fig_container.append(f)

    for idx, f in enumerate(fig_container):
        for _ in f.data:
            fig.append_trace(_, idx + 1, 2)

    fig.layout.width = 2000
    fig.layout.height = 2000
    fig.layout.xaxis1.zeroline = False
    fig.layout.yaxis1.zeroline = False
    fig.layout.xaxis3.zeroline = False
    fig.layout.yaxis3.zeroline = False
    fig.layout.hovermode = 'closest'
    # showticklabels
    for _ in dir(fig.layout):
        if _.startswith('xaxis') or _.startswith('yaxis'):
            fig.layout[_]['showticklabels'] = False
    fig.layout.font.update(dict(size=20))
    for _ in fig.layout.annotations:
        _['font'].update(dict(size=25))

    return write_figure(fig, mode, **kwargs)