def tsne(df, out_csv: Path): log.info(relpath(out_csv)) if out_csv.is_file(): X = pd.read_csv(out_csv, sep='\t', compression="infer", index_col=0) assert (X.index.name == "sample_name") assert (list(X.columns) == ['x', 'y']) else: assert (df.index.name == "gene_name") X = pd.DataFrame( index=pd.Series(df.columns, name="sample_name"), columns=['x', 'y'], data=TSNE(random_state=seed).fit_transform(df.T), ) X.to_csv(out_csv, sep='\t', compression="gzip") # https://matplotlib.org/tutorials/introductory/customizing.html style = { 'legend.fontsize': "xx-small", 'legend.framealpha': 0.5, } with Plox(style) as px: px.a.plot(X.x, X.y, '.', ms=(10 / np.log10(len(X)))) # px.a.legend() px.a.axis('off') px.f.savefig(out_csv.with_suffix(".png"))
def view_graph(g): with Plox() as px: params = dict(node_size=2) bp = [(a, b) for (a, b, d) in g.edges.data(data='type') if (d == 'bp')] bb = [(a, b) for (a, b, d) in g.edges.data(data='type') if (d == 'bb')] for (a, b, d) in g.edges.data(data='type'): g.edges[(a, b)]['weight'] = {'bp': 1, 'bb': 10}[d] pos = graphviz_layout(g, prog="sfdp") pos = nx.spring_layout(g, k=10, pos=pos, iterations=1000, threshold=1e-8, weight='weight') # pos = graphviz_layout(g, prog="circo") # for i in range(1000000): # for (a, b, d) in g.edges(data='type'): # k = {'bp': 50, 'bb': 10}[d] # va = np.asarray(pos[a]) # vb = np.asarray(pos[b]) # l = np.linalg.norm(va - vb) # f = 0.9 if (l > k) else 1.01 # (va, vb) = (vb + (va - vb) * f, va + (vb - va) * f) # pos[a] = tuple(va) # pos[b] = tuple(vb) # # if not (i % 1000): # import matplotlib.pyplot as plt # # px.f.clear() # nx.draw_networkx_nodes(g, pos=pos, **params) # nx.draw_networkx_edges(g, pos=pos, ax=px.a, edgelist=bp, edge_color='b', **params) # nx.draw_networkx_edges(g, pos=pos, ax=px.a, edgelist=bb, edge_color='k', **params) # # nx.draw_n # # plt.ion() # plt.show() # plt.pause(0.1) # # # exit() nx.draw_networkx_nodes(g, pos=pos, **params) nx.draw_networkx_edges(g, pos=pos, ax=px.a, edgelist=bp, edge_color='b', **params) nx.draw_networkx_edges(g, pos=pos, ax=px.a, edgelist=bb, edge_color='k', **params) nx.draw_networkx_nodes(g, pos=pos, ax=px.a, nodelist=[min(g.nodes)], node_color='g', **params) nx.draw_networkx_nodes(g, pos=pos, ax=px.a, nodelist=[max(g.nodes)], node_color='r', **params) PARAM['out_fig'].parent.mkdir(parents=True, exist_ok=True) px.f.savefig(PARAM['out_fig'])
def visualize_graph(g: nx.DiGraph) -> Plox: with Plox() as px: nodes_0 = ["___"] nodes_1 = [ n for (n, k) in g.nodes(data='kind') if (k != "aa") and (n.count('_') == 2) ] nodes_2 = [ n for (n, k) in g.nodes(data='kind') if (k != "aa") and (n.count('_') == 1) ] nodes_3 = [ n for (n, k) in g.nodes(data='kind') if (k != "aa") and (n.count('_') == 0) ] nodes_aa = [n for (n, k) in g.nodes(data='kind') if (k == "aa")] pos = graphviz_layout(g, prog="twopi", root='___') # pos = nx.spring_layout(g, pos=pos) # pos = nx.shell_layout(g, nlist=[nodes_0, nodes_1, nodes_2, nodes_3, nodes_aa]) # pos = nx.planar_layout(g) # pos = nx.kamada_kawai_layout(g) # pos = nx.spring_layout(g, pos=pos, k=10, iterations=10, threshold=1e-8) nx.draw_networkx_edges(g, pos=pos) nx.draw_networkx_nodes(g, pos=pos, nodelist=(nodes_0 + nodes_1 + nodes_2 + nodes_3)) nx.draw_networkx_nodes(g, pos=pos, nodelist=nodes_aa, node_color='r') labels = { n: { 'aa': n, 'origin': "-", None: last(n.strip('_'), None) }[k] for (n, k) in g.nodes(data='kind') } nx.draw_networkx_labels(g, pos=pos, labels=labels) yield px
a = -1 b = 0.0 x = np.arange(0, n) s = 1 # Generate observations y = a + b * x + norm(loc=0, scale=s).rvs(n) # Linear regression (b_hat, a_hat, _, _, _) = linregress(x, y) # The log-likelihood statistic lam = (s**(-2)) * (np.sum(np.power(y - np.mean(y), 2)) - np.sum(np.power(y - (a_hat + b_hat * x), 2))) return lam n = 10 r = 1000 T = np.array([experiment(n=n) for _ in range(r)]) with Plox() as px: T = T[(np.quantile(T, 0.01) <= T) & (T <= np.quantile(T, 0.9))] px.a.hist(T, bins='stone', density=True, label="Observed") df = 1 px.a.plot(sorted(T), chi2(df=df).pdf(sorted(T)), '-', label=F"chi-squared (df={df})") px.a.set_title(F"Wilks' theorem for {r} experiments of sample size {n}") px.a.set_xlabel("lambda") px.a.legend() px.f.savefig(Path(__file__).with_suffix(".png"))
def main(): (obs, h0) = get_obs() ps = pvalues(obs) ranks = np.asarray(range[1, len(ps)]) (ps, h0, obs) = map(np.asarray, zip(*sorted(zip(ps, h0, obs)))) alpha = 0.05 print(F"There are {sum(ps <= alpha)} p-values less or equal {alpha}.") # The true False Discovery Rate for the corresponding p-values `ps` fdr_true = np.cumsum(h0) / ranks # Benjamini-Hochberg estimate: p-value -> FDR fdr_est = np.asarray(to_fdr(ps)) fig_path = Path(__file__).parent / "figs" fig_path.mkdir(parents=True, exist_ok=True) with Plox() as px: ii = (fdr_true > 0) # because of log-log plot px.a.plot(ps[ii], fdr_true[ii], label="True FDR") px.a.plot(ps[ii], fdr_est[ii], label="B-H FDR") px.a.plot([min(ps[ii]), max(ps[ii])], [alpha, alpha], "r--", label=(F"FDR = {alpha}")) px.a.grid() px.a.set_xscale('log') px.a.set_yscale('log') px.a.set_xlabel("p-value") px.a.set_ylabel("FDR") px.a.legend() px.f.savefig(fig_path / "fdr.png") with Plox() as px: px.a.hist(ps[h0], bins='scott') px.a.set_title("H0 is true. Observed p-value.") px.f.savefig(fig_path / "h0_yay.png") with Plox() as px: px.a.hist(ps[~h0], bins='scott') px.a.set_title("H0 is false. Observed p-value.") px.f.savefig(fig_path / "h0_nay.png") with Plox() as px: px.a.hist(ps, bins='scott') px.a.set_title("Observed p-value.") px.f.savefig(fig_path / "h0_all.png") bh = alpha / len(ranks) * ranks p_fdr = max(ps[ps <= bh]) with Plox() as px: px.a.scatter(ranks, ps, s=1, c=h0, cmap=plt.cm.get_cmap("copper"), label="Tests") px.a.plot([min(ranks), max(ranks)], [alpha, alpha], 'b-', label=F"p-value = {alpha}") px.a.plot([min(ranks), max(ranks)], [p_fdr, p_fdr], 'g-', label=F"FDR = {alpha}") px.a.plot(ranks, bh, 'r--', label="BH line") px.a.legend() px.a.grid() px.a.set_xscale('log') px.a.set_yscale('log') px.a.set_xlabel("rank") px.a.set_ylabel("p-value") px.f.savefig(fig_path / "p.png")
# RA, 2020-12-20 from plox import Plox, rcParam from pathlib import Path # https://matplotlib.org/tutorials/introductory/customizing.html style = { rcParam.Legend.fontsize: "large", rcParam.Xtick.labelsize: "large", rcParam.Ytick.labelsize: "large", } with Plox(style) as px: px.a.plot([1, 2, 3], [4, 3, 5], 'o--') px.f.savefig(Path(__file__).with_suffix('.png')) px.show()
def show(g: nx.MultiDiGraph, state: pd.Series): # pos = nx.shell_layout(g) # pos = nx.planar_layout(g) # pos = nx.spring_layout(g) pos = { 'Free cargo (c)': [-6, +8], 'Cargo·Impβ (c)': [+6, +8], 'Impβ·Ran·GTP (c)': [-4, +6], 'Free Impβ (c)': [+4, +6], 'Ran·GTP (c)': [-2, +4], 'Ran·GDP (c)': [+2, +4], 'RanBP1·Ran·GTP (c)': [0, 3], # 'RanBP1 (c)': [0, 1], 'Ran·GTP (n)': [-2, -2], 'Ran·GDP (n)': [+2, -2], 'Impβ·Ran·GTP (n)': [-4, -4], 'Free Impβ (n)': [+4, -4], 'Free cargo (n)': [-6, -6], 'Cargo·Impβ (n)': [+6, -6], } # pos = nx.get_node_attributes(g, name='pos') species = pd.Series(nx.get_node_attributes(g, name='matlab')) # species = pd.DataFrame({'matlab': species, 'value': species.map(state)}) species = species.map(state) if any(species.isna()): log.warning( f"Species have no data: \n{list(species[species.isna()].index)}") # Directed multifluxes fluxes = pd.Series(nx.get_edge_attributes(g, name='matlab')) fluxes = pd.DataFrame({'matlab': fluxes, 'value': fluxes.map(state)}) # Directed fluxes f = fluxes.reset_index().groupby( by=['level_0', 'level_1']).value.sum().to_dict() # One directed edge per pair - version of the graph ug = (lambda ug: nx.DiGraph(ug).edge_subgraph(ug.edges))(nx.Graph(g)) # Combined fluxes fluxes = pd.Series(index=list(ug.edges), data=list( ug.edges)).transform(lambda e: (f.get(e, 0) - f.get((e[1], e[0]), 0))) if any(fluxes.isna()): log.warning( f"Fluxes have no data: \n{list(fluxes[fluxes.isna()].index)}") # log.info(f"Species: \n{species}") # log.info(f"Fluxes: \n{fluxes}") node_size = species.fillna(0) # node_size = node_size[node_size > 0] # node_size = node_size / node_size.sum() node_size = 300 * node_size node_labels = pd.Series(data=species.index, index=species.index) node_labels = node_labels.transform( lambda s: s.replace("(c)", "").replace("(n)", "").strip()) edge_width = fluxes edge_width = edge_width[edge_width != 0] (fwd, bwd) = (edge_width[edge_width >= 0].index, edge_width[edge_width < 0].index) edge_width = edge_width.transform(lambda x: np.log10(np.abs(x))) edge_width = 0.4 + (edge_width - edge_width.min()) / ( (edge_width.max() - edge_width.min()) or 1) edge_alpha = 0.7 * (edge_width / edge_width.max()) edge_labels = fluxes.abs().transform(lambda x: f"{x:0.02g}") style = { rc.Figure.frameon: False, } with Plox(style) as px: kw = dict(G=g, pos=pos, ax=px.a, alpha=0.4) nx.draw_networkx_nodes(**kw, nodelist=node_size.index, node_size=node_size, node_color="C0", linewidths=0) kw = dict(G=ug, pos=pos, ax=px.a, edge_color='g') nx.draw_networkx_edges(**kw, width=edge_width[fwd], alpha=edge_alpha[fwd], edgelist=fwd) nx.draw_networkx_edges(**kw, width=edge_width[bwd], alpha=edge_alpha[bwd], edgelist=[(v, ug) for (ug, v) in bwd]) kw = dict(G=ug, pos=pos, ax=px.a, alpha=0.7) nx.draw_networkx_edge_labels(**kw, edge_labels=edge_labels.to_dict(), font_size=5, font_color='g') kw = dict(G=g, pos=pos, ax=px.a, alpha=0.8, font_color='k') nx.draw_networkx_labels(**kw, font_size=7, labels=node_labels, verticalalignment="bottom") nx.draw_networkx_labels(**kw, font_size=6, labels=species.transform( lambda x: f"{x:0.02g}"), verticalalignment="top") px.a.axis('off') y = np.mean(px.a.get_ylim()) px.a.plot(px.a.get_xlim(), [y, y], '--', color='k', lw=1, zorder=-100) kw = dict(x=(min(px.a.get_xlim()) + 0.8), ha="center", zorder=100, alpha=0.5, fontdict=dict(fontsize=6)) px.a.text(**kw, y=(y + 0.1), s="Cytoplasm", va="bottom") px.a.text(**kw, y=(y - 0.1), s="Nucleus", va="top") out_dir = mkdir(Path(__file__).with_suffix('')) px.f.savefig(out_dir / "onion.png") px.f.savefig(out_dir / "onion.pdf")