Esempio n. 1
0
def draw_ranking(data,
                 cols_dict,
                 output,
                 mode='html',
                 width=1600,
                 height=1400,
                 sort_col='SAFE enriched score'):
    col_names = list(cols_dict.keys())
    if len(col_names) == 1:
        fig = tools.make_subplots(1, 1)
    else:
        fig = tools.make_subplots(1,
                                  len(cols_dict),
                                  shared_yaxes=True,
                                  horizontal_spacing=0,
                                  subplot_titles=col_names)

    sorted_cols = [_ for _ in data.columns if _.startswith(sort_col)]
    if not sorted_cols:
        logger(
            "data you provide doesn't contain columns like %s, Maybe you provide a metadata directly? instead of SAFE summary table."
            % sort_col,
            verbose=1)
        sorted_df = data
    else:
        sorted_df = data.sort_values(
            [_ for _ in data.columns if _.startswith(sort_col)],
            ascending=False)

    def _add_trace(name, col):
        fig.append_trace(
            go.Bar(x=sorted_df.loc[:, name],
                   y=sorted_df.index,
                   marker=dict(line=dict(width=1)),
                   orientation='h',
                   showlegend=False), 1, col)

    for idx, each in enumerate(col_names):
        col = idx + 1

        name = [_ for _ in cols_dict[each] if _.startswith(sort_col)]
        if not name and [_ for _ in cols_dict[each] if _.startswith('r2')]:
            name = [_ for _ in cols_dict[each] if _.startswith('r2')][0]
        elif name:
            name = name[0]
        else:
            logger("Unkown input file.", verbose=1)
        _add_trace(name, col)

    fig.layout.yaxis.autorange = 'reversed'

    fig.layout.margin.l = width / 4
    fig.layout.width = width
    fig.layout.height = height

    output_fig(fig, output, mode)
    logger("Ranking graph has been output to", output, verbose=1)
Esempio n. 2
0
def draw_stratification(graph,
                        SAFE_dict,
                        cols,
                        output,
                        mode='html',
                        n_iter=1000,
                        p_val=0.05,
                        width=1000,
                        height=1000,
                        allnodes=False):
    # Enterotyping-like stratification map based on SAFE score

    node_pos = graph.nodePos
    sizes = graph.size
    nodes = graph.nodes
    sizes = np.array([sizes[_] for _ in range(len(nodes))]).reshape(-1, 1)

    transformed_sizes = MinMaxScaler(
        feature_range=(10, 40)).fit_transform(sizes).ravel()
    xs = []
    ys = []
    for edge in graph.edges:
        xs += [node_pos[edge[0], 0], node_pos[edge[1], 0], None]
        ys += [node_pos[edge[0], 1], node_pos[edge[1], 1], None]
    fig = plotly.tools.make_subplots(1, 1)

    node_line = go.Scatter(
        # ordination line
        visible=True,
        x=xs,
        y=ys,
        marker=dict(color="#8E9DA2", opacity=0.7),
        line=dict(width=1),
        showlegend=False,
        hoverinfo='skip',
        mode="lines")
    fig.append_trace(node_line, 1, 1)

    safe_score_df = pd.DataFrame.from_dict(
        SAFE_dict)  # row: nodes, columns: features
    min_p_value = 1.0 / (n_iter + 1.0)
    SAFE_pvalue = np.log10(p_val) / np.log10(min_p_value)
    tmp = [
        safe_score_df.columns[_]
        if safe_score_df.iloc[idx, _] >= SAFE_pvalue else np.nan
        for idx, _ in enumerate(np.argmax(safe_score_df.values, axis=1))
    ]
    # get enriched features with biggest SAFE_score per nodes.
    t = Counter(tmp)
    # number of (imp) features among all nodes. (imp: with biggest SAFE score per node compared other features at same node and bigger than p_val)
    if cols:
        if any([_ not in safe_score_df.columns for _ in cols]):
            logger(
                "There are provided cols \" %s\"doesn't at SAFE summary table."
                % ';'.join(cols),
                verbose=1)
        for fea in cols:
            if allnodes:
                color = Color(SAFE_dict[fea],
                              target_by='node',
                              dtype='numerical')
                subfig = vis_progressX(graph,
                                       simple=True,
                                       mode='obj',
                                       color=color)
                subfig.data[1]['name'] = fea
                fig.append_trace(subfig.data[1], 1, 1)
            else:
                get_nodes_bool = (safe_score_df.loc[:, fea] >=
                                  SAFE_pvalue).all()
                if not get_nodes_bool:
                    # if all False....
                    logger(
                        "fea: %s get all False bool indicated there are not enriched nodes showed at the graph"
                        % fea,
                        verbose=1)
                else:
                    node_position = go.Scatter(
                        # node position
                        visible=True,
                        x=node_pos[get_nodes_bool, 0],
                        y=node_pos[get_nodes_bool, 1],
                        hoverinfo="text",
                        marker=dict(  # color=node_colors,
                            size=[
                                sizes[_, 0] for _ in np.arange(
                                    node_pos.shape[0])[get_nodes_bool]
                            ],
                            opacity=0.9),
                        showlegend=True,
                        name=str(fea) + ' (%s)' % str(t.get(fea, 0)),
                        mode="markers")
                    fig.append_trace(node_position, 1, 1)
    else:
        for idx, fea in enumerate(
            [_ for _, v in sorted(t.items(), key=lambda x: x[1]) if v >= 10]):
            # safe higher than threshold, just centroides
            node_position = go.Scatter(
                # node position
                visible=True,
                x=node_pos[np.array(tmp) == fea, 0],
                y=node_pos[np.array(tmp) == fea, 1],
                hoverinfo="text",
                marker=dict(  # color=node_colors,
                    size=[
                        transformed_sizes[_] for _ in np.arange(
                            node_pos.shape[0])[np.array(tmp) == fea]
                    ],
                    opacity=0.9),
                showlegend=True,
                name=str(fea) + ' (%s)' % str(t[fea]),
                mode="markers")
            fig.append_trace(node_position, 1, 1)
    fig.layout.width = width
    fig.layout.height = height
    fig.layout.font.size = 15
    fig.layout.hovermode = 'closest'

    output_fig(fig, output, mode)
    logger("Stratification graph has been output to", output, verbose=1)
Esempio n. 3
0
def draw_PCOA(rawdatas,
              summary_datas,
              output,
              mode='html',
              width=1500,
              height=1000,
              sort_col='SAFE enriched score'):
    """
    Currently ordination visualization just support pcoa.
    :param rawdata:
    :param summary_data:
    :param output:
    :param mode:
    :param width:
    :param height:
    :param sort_col:
    :return:
    """
    fig = go.Figure()
    summary_data = pd.concat(summary_datas, axis=0)
    # it won't raise error even it only contains one df.
    safe_dfs = [pd.DataFrame.from_dict(r_dict) for r_dict in rawdatas
                ]  # row represents nodes, columns represents features.
    safe_df = pd.concat(safe_dfs, axis=1)
    safe_df = safe_df.reindex(columns=summary_data.index)

    pca = PCA()
    pca_result = pca.fit_transform(safe_df.T)

    mx_scale = MinMaxScaler(feature_range=(10, 40)).fit(
        summary_data.loc[:, [sort_col]])
    top10_feas = list(
        summary_data.sort_values(sort_col, ascending=False).index[:10])

    for each in summary_datas:
        vals = each.loc[:, [sort_col]]
        fig.add_trace(
            go.Scatter(
                x=pca_result[safe_df.columns.isin(each.index), 0],
                y=pca_result[safe_df.columns.isin(each.index), 1],
                mode="markers",
                marker=dict(  # color=color_codes[cat],
                    size=mx_scale.transform(vals),
                    opacity=0.5),
                showlegend=True if len(summary_datas) > 1 else False,
                text=safe_df.columns[safe_df.columns.isin(each.index)]))

    fig.add_trace(
        go.Scatter(
            x=pca_result[safe_df.columns.isin(top10_feas), 0],
            y=pca_result[safe_df.columns.isin(top10_feas), 1],
            # visible=False,
            mode="text",
            hoverinfo='none',
            textposition="middle center",
            name='name for searching',
            showlegend=False,
            textfont=dict(size=13),
            text=top10_feas))

    fig.layout.update(
        dict(
            xaxis=dict(
                title="PC1({:.2f}%)".format(pca.explained_variance_ratio_[0] *
                                            100)),
            yaxis=dict(
                title="PC2({:.2f}%)".format(pca.explained_variance_ratio_[1] *
                                            100)),
            width=width,
            height=height,
            font=dict(size=15),
            hovermode='closest',
        ))

    output_fig(fig, output, mode)
    logger("Ordination graph has been output to", output, verbose=1)