Esempio n. 1
0
def tsne(df, out_csv: Path):
    log.info(relpath(out_csv))

    if out_csv.is_file():
        X = pd.read_csv(out_csv, sep='\t', compression="infer", index_col=0)
        assert (X.index.name == "sample_name")
        assert (list(X.columns) == ['x', 'y'])
    else:
        assert (df.index.name == "gene_name")
        X = pd.DataFrame(
            index=pd.Series(df.columns, name="sample_name"),
            columns=['x', 'y'],
            data=TSNE(random_state=seed).fit_transform(df.T),
        )
        X.to_csv(out_csv, sep='\t', compression="gzip")

    # https://matplotlib.org/tutorials/introductory/customizing.html
    style = {
        'legend.fontsize': "xx-small",
        'legend.framealpha': 0.5,
    }

    with Plox(style) as px:
        px.a.plot(X.x, X.y, '.', ms=(10 / np.log10(len(X))))
        # px.a.legend()
        px.a.axis('off')
        px.f.savefig(out_csv.with_suffix(".png"))
Esempio n. 2
0
def view_graph(g):
    with Plox() as px:
        params = dict(node_size=2)

        bp = [(a, b) for (a, b, d) in g.edges.data(data='type') if (d == 'bp')]
        bb = [(a, b) for (a, b, d) in g.edges.data(data='type') if (d == 'bb')]

        for (a, b, d) in g.edges.data(data='type'):
            g.edges[(a, b)]['weight'] = {'bp': 1, 'bb': 10}[d]

        pos = graphviz_layout(g, prog="sfdp")
        pos = nx.spring_layout(g, k=10, pos=pos, iterations=1000, threshold=1e-8, weight='weight')

        # pos = graphviz_layout(g, prog="circo")
        # for i in range(1000000):
        #     for (a, b, d) in g.edges(data='type'):
        #         k = {'bp': 50, 'bb': 10}[d]
        #         va = np.asarray(pos[a])
        #         vb = np.asarray(pos[b])
        #         l = np.linalg.norm(va - vb)
        #         f = 0.9 if (l > k) else 1.01
        #         (va, vb) = (vb + (va - vb) * f, va + (vb - va) * f)
        #         pos[a] = tuple(va)
        #         pos[b] = tuple(vb)
        #
        #     if not (i % 1000):
        #         import matplotlib.pyplot as plt
        #
        #         px.f.clear()
        #         nx.draw_networkx_nodes(g, pos=pos, **params)
        #         nx.draw_networkx_edges(g, pos=pos, ax=px.a, edgelist=bp, edge_color='b', **params)
        #         nx.draw_networkx_edges(g, pos=pos, ax=px.a, edgelist=bb, edge_color='k', **params)
        #         # nx.draw_n
        #
        #         plt.ion()
        #         plt.show()
        #         plt.pause(0.1)
        #
        #
        # exit()

        nx.draw_networkx_nodes(g, pos=pos, **params)
        nx.draw_networkx_edges(g, pos=pos, ax=px.a, edgelist=bp, edge_color='b', **params)
        nx.draw_networkx_edges(g, pos=pos, ax=px.a, edgelist=bb, edge_color='k', **params)

        nx.draw_networkx_nodes(g, pos=pos, ax=px.a, nodelist=[min(g.nodes)], node_color='g', **params)
        nx.draw_networkx_nodes(g, pos=pos, ax=px.a, nodelist=[max(g.nodes)], node_color='r', **params)

        PARAM['out_fig'].parent.mkdir(parents=True, exist_ok=True)
        px.f.savefig(PARAM['out_fig'])
Esempio n. 3
0
def visualize_graph(g: nx.DiGraph) -> Plox:
    with Plox() as px:
        nodes_0 = ["___"]
        nodes_1 = [
            n for (n, k) in g.nodes(data='kind')
            if (k != "aa") and (n.count('_') == 2)
        ]
        nodes_2 = [
            n for (n, k) in g.nodes(data='kind')
            if (k != "aa") and (n.count('_') == 1)
        ]
        nodes_3 = [
            n for (n, k) in g.nodes(data='kind')
            if (k != "aa") and (n.count('_') == 0)
        ]
        nodes_aa = [n for (n, k) in g.nodes(data='kind') if (k == "aa")]

        pos = graphviz_layout(g, prog="twopi", root='___')
        # pos = nx.spring_layout(g, pos=pos)
        # pos = nx.shell_layout(g, nlist=[nodes_0, nodes_1, nodes_2, nodes_3, nodes_aa])
        # pos = nx.planar_layout(g)
        # pos = nx.kamada_kawai_layout(g)
        # pos = nx.spring_layout(g, pos=pos, k=10, iterations=10, threshold=1e-8)

        nx.draw_networkx_edges(g, pos=pos)

        nx.draw_networkx_nodes(g,
                               pos=pos,
                               nodelist=(nodes_0 + nodes_1 + nodes_2 +
                                         nodes_3))
        nx.draw_networkx_nodes(g, pos=pos, nodelist=nodes_aa, node_color='r')

        labels = {
            n: {
                'aa': n,
                'origin': "-",
                None: last(n.strip('_'), None)
            }[k]
            for (n, k) in g.nodes(data='kind')
        }
        nx.draw_networkx_labels(g, pos=pos, labels=labels)

        yield px
Esempio n. 4
0
File: ex2.py Progetto: numpde/cbb
    a = -1
    b = 0.0
    x = np.arange(0, n)
    s = 1
    # Generate observations
    y = a + b * x + norm(loc=0, scale=s).rvs(n)
    # Linear regression
    (b_hat, a_hat, _, _, _) = linregress(x, y)
    # The log-likelihood statistic
    lam = (s**(-2)) * (np.sum(np.power(y - np.mean(y), 2)) -
                       np.sum(np.power(y - (a_hat + b_hat * x), 2)))
    return lam


n = 10
r = 1000
T = np.array([experiment(n=n) for _ in range(r)])

with Plox() as px:
    T = T[(np.quantile(T, 0.01) <= T) & (T <= np.quantile(T, 0.9))]
    px.a.hist(T, bins='stone', density=True, label="Observed")
    df = 1
    px.a.plot(sorted(T),
              chi2(df=df).pdf(sorted(T)),
              '-',
              label=F"chi-squared (df={df})")
    px.a.set_title(F"Wilks' theorem for {r} experiments of sample size {n}")
    px.a.set_xlabel("lambda")
    px.a.legend()
    px.f.savefig(Path(__file__).with_suffix(".png"))
Esempio n. 5
0
def main():
    (obs, h0) = get_obs()
    ps = pvalues(obs)
    ranks = np.asarray(range[1, len(ps)])

    (ps, h0, obs) = map(np.asarray, zip(*sorted(zip(ps, h0, obs))))

    alpha = 0.05
    print(F"There are {sum(ps <= alpha)} p-values less or equal {alpha}.")

    # The true False Discovery Rate for the corresponding p-values `ps`
    fdr_true = np.cumsum(h0) / ranks

    # Benjamini-Hochberg estimate: p-value -> FDR
    fdr_est = np.asarray(to_fdr(ps))

    fig_path = Path(__file__).parent / "figs"
    fig_path.mkdir(parents=True, exist_ok=True)

    with Plox() as px:
        ii = (fdr_true > 0)  # because of log-log plot
        px.a.plot(ps[ii], fdr_true[ii], label="True FDR")
        px.a.plot(ps[ii], fdr_est[ii], label="B-H FDR")
        px.a.plot([min(ps[ii]), max(ps[ii])], [alpha, alpha],
                  "r--",
                  label=(F"FDR = {alpha}"))
        px.a.grid()
        px.a.set_xscale('log')
        px.a.set_yscale('log')
        px.a.set_xlabel("p-value")
        px.a.set_ylabel("FDR")
        px.a.legend()
        px.f.savefig(fig_path / "fdr.png")

    with Plox() as px:
        px.a.hist(ps[h0], bins='scott')
        px.a.set_title("H0 is true. Observed p-value.")
        px.f.savefig(fig_path / "h0_yay.png")

    with Plox() as px:
        px.a.hist(ps[~h0], bins='scott')
        px.a.set_title("H0 is false. Observed p-value.")
        px.f.savefig(fig_path / "h0_nay.png")

    with Plox() as px:
        px.a.hist(ps, bins='scott')
        px.a.set_title("Observed p-value.")
        px.f.savefig(fig_path / "h0_all.png")

    bh = alpha / len(ranks) * ranks
    p_fdr = max(ps[ps <= bh])

    with Plox() as px:
        px.a.scatter(ranks,
                     ps,
                     s=1,
                     c=h0,
                     cmap=plt.cm.get_cmap("copper"),
                     label="Tests")
        px.a.plot([min(ranks), max(ranks)], [alpha, alpha],
                  'b-',
                  label=F"p-value = {alpha}")
        px.a.plot([min(ranks), max(ranks)], [p_fdr, p_fdr],
                  'g-',
                  label=F"FDR = {alpha}")
        px.a.plot(ranks, bh, 'r--', label="BH line")
        px.a.legend()
        px.a.grid()
        px.a.set_xscale('log')
        px.a.set_yscale('log')
        px.a.set_xlabel("rank")
        px.a.set_ylabel("p-value")
        px.f.savefig(fig_path / "p.png")
Esempio n. 6
0
# RA, 2020-12-20

from plox import Plox, rcParam
from pathlib import Path

# https://matplotlib.org/tutorials/introductory/customizing.html
style = {
    rcParam.Legend.fontsize: "large",
    rcParam.Xtick.labelsize: "large",
    rcParam.Ytick.labelsize: "large",
}

with Plox(style) as px:
    px.a.plot([1, 2, 3], [4, 3, 5], 'o--')
    px.f.savefig(Path(__file__).with_suffix('.png'))
    px.show()
Esempio n. 7
0
def show(g: nx.MultiDiGraph, state: pd.Series):
    # pos = nx.shell_layout(g)
    # pos = nx.planar_layout(g)
    # pos = nx.spring_layout(g)
    pos = {
        'Free cargo (c)': [-6, +8],
        'Cargo·Impβ (c)': [+6, +8],
        'Impβ·Ran·GTP (c)': [-4, +6],
        'Free Impβ (c)': [+4, +6],
        'Ran·GTP (c)': [-2, +4],
        'Ran·GDP (c)': [+2, +4],
        'RanBP1·Ran·GTP (c)': [0, 3],
        # 'RanBP1 (c)': [0, 1],
        'Ran·GTP (n)': [-2, -2],
        'Ran·GDP (n)': [+2, -2],
        'Impβ·Ran·GTP (n)': [-4, -4],
        'Free Impβ (n)': [+4, -4],
        'Free cargo (n)': [-6, -6],
        'Cargo·Impβ (n)': [+6, -6],
    }
    # pos = nx.get_node_attributes(g, name='pos')

    species = pd.Series(nx.get_node_attributes(g, name='matlab'))
    # species = pd.DataFrame({'matlab': species, 'value': species.map(state)})
    species = species.map(state)

    if any(species.isna()):
        log.warning(
            f"Species have no data: \n{list(species[species.isna()].index)}")

    # Directed multifluxes
    fluxes = pd.Series(nx.get_edge_attributes(g, name='matlab'))
    fluxes = pd.DataFrame({'matlab': fluxes, 'value': fluxes.map(state)})
    # Directed fluxes
    f = fluxes.reset_index().groupby(
        by=['level_0', 'level_1']).value.sum().to_dict()
    # One directed edge per pair - version of the graph
    ug = (lambda ug: nx.DiGraph(ug).edge_subgraph(ug.edges))(nx.Graph(g))
    # Combined fluxes
    fluxes = pd.Series(index=list(ug.edges), data=list(
        ug.edges)).transform(lambda e: (f.get(e, 0) - f.get((e[1], e[0]), 0)))

    if any(fluxes.isna()):
        log.warning(
            f"Fluxes have no data: \n{list(fluxes[fluxes.isna()].index)}")

    # log.info(f"Species: \n{species}")
    # log.info(f"Fluxes: \n{fluxes}")

    node_size = species.fillna(0)
    # node_size = node_size[node_size > 0]
    # node_size = node_size / node_size.sum()
    node_size = 300 * node_size

    node_labels = pd.Series(data=species.index, index=species.index)
    node_labels = node_labels.transform(
        lambda s: s.replace("(c)", "").replace("(n)", "").strip())

    edge_width = fluxes
    edge_width = edge_width[edge_width != 0]
    (fwd, bwd) = (edge_width[edge_width >= 0].index,
                  edge_width[edge_width < 0].index)
    edge_width = edge_width.transform(lambda x: np.log10(np.abs(x)))
    edge_width = 0.4 + (edge_width - edge_width.min()) / (
        (edge_width.max() - edge_width.min()) or 1)
    edge_alpha = 0.7 * (edge_width / edge_width.max())

    edge_labels = fluxes.abs().transform(lambda x: f"{x:0.02g}")

    style = {
        rc.Figure.frameon: False,
    }

    with Plox(style) as px:
        kw = dict(G=g, pos=pos, ax=px.a, alpha=0.4)
        nx.draw_networkx_nodes(**kw,
                               nodelist=node_size.index,
                               node_size=node_size,
                               node_color="C0",
                               linewidths=0)

        kw = dict(G=ug, pos=pos, ax=px.a, edge_color='g')
        nx.draw_networkx_edges(**kw,
                               width=edge_width[fwd],
                               alpha=edge_alpha[fwd],
                               edgelist=fwd)
        nx.draw_networkx_edges(**kw,
                               width=edge_width[bwd],
                               alpha=edge_alpha[bwd],
                               edgelist=[(v, ug) for (ug, v) in bwd])

        kw = dict(G=ug, pos=pos, ax=px.a, alpha=0.7)
        nx.draw_networkx_edge_labels(**kw,
                                     edge_labels=edge_labels.to_dict(),
                                     font_size=5,
                                     font_color='g')

        kw = dict(G=g, pos=pos, ax=px.a, alpha=0.8, font_color='k')
        nx.draw_networkx_labels(**kw,
                                font_size=7,
                                labels=node_labels,
                                verticalalignment="bottom")
        nx.draw_networkx_labels(**kw,
                                font_size=6,
                                labels=species.transform(
                                    lambda x: f"{x:0.02g}"),
                                verticalalignment="top")

        px.a.axis('off')

        y = np.mean(px.a.get_ylim())
        px.a.plot(px.a.get_xlim(), [y, y], '--', color='k', lw=1, zorder=-100)

        kw = dict(x=(min(px.a.get_xlim()) + 0.8),
                  ha="center",
                  zorder=100,
                  alpha=0.5,
                  fontdict=dict(fontsize=6))
        px.a.text(**kw, y=(y + 0.1), s="Cytoplasm", va="bottom")
        px.a.text(**kw, y=(y - 0.1), s="Nucleus", va="top")

        out_dir = mkdir(Path(__file__).with_suffix(''))
        px.f.savefig(out_dir / "onion.png")
        px.f.savefig(out_dir / "onion.pdf")