Beispiel #1
0
def get_graph():
    g = nx.MultiDiGraph()

    g.add_nodes_from(species.keys())
    nx.set_node_attributes(g, species, name="matlab")

    # Flux and how to get its value
    g.add_edge('Ran·GTP (n)', 'Ran·GTP (c)', matlab="FluxRanGTP")
    g.add_edge('Ran·GDP (n)', 'Ran·GDP (c)', matlab="FluxRanGDP")

    g.add_edge('Ran·GTP (c)', 'Ran·GDP (c)', matlab="GAP")

    g.add_edge('RanBP1·Ran·GTP (c)', 'Ran·GDP (c)', matlab="GAP_RanBP1")
    g.add_edge('Ran·GTP (c)', 'RanBP1·Ran·GTP (c)', matlab="F RanGTP--RanBP1")

    # g.add_edge('RanBP1·Ran·GTP (c)', 'RanBP1 (c)', matlab="GAP_RanBP1")
    # g.add_edge('RanBP1 (c)', 'RanBP1·Ran·GTP (c)', matlab="F RanGTP--RanBP1")

    g.add_edge('Impβ·Ran·GTP (c)', 'Ran·GDP (c)', matlab="ImpB GAP")
    g.add_edge('Impβ·Ran·GTP (c)', 'Free Impβ (c)', matlab="ImpB GAP")
    g.add_edge('Free Impβ (n)', 'Free Impβ (c)', matlab="F ImpB")
    g.add_edge('Ran·GDP (n)',
               'Ran·GTP (n)',
               matlab="Nuc RanGDP to RanGTP conversion")
    g.add_edge('Impβ·Ran·GTP (n)', 'Impβ·Ran·GTP (c)', matlab="F ImpB--RanGTP")
    g.add_edge('Impβ·Ran·GTP (n)', 'Free Impβ (n)', matlab="R nuc")
    g.add_edge('Impβ·Ran·GTP (n)', 'Ran·GTP (n)', matlab="R nuc")
    g.add_edge('Impβ·Ran·GTP (c)', 'Free Impβ (c)', matlab="R cyto")
    g.add_edge('Impβ·Ran·GTP (c)', 'Ran·GTP (c)', matlab="R cyto")

    g.add_edge('Free cargo (n)', 'Free cargo (c)', matlab="F Cargo")
    g.add_edge('Cargo·Impβ (n)', 'Cargo·Impβ (c)', matlab="F ImpB--Cargo")

    g.add_edge('Cargo·Impβ (n)', 'Free Impβ (n)', matlab="C nuc")
    g.add_edge('Cargo·Impβ (n)', 'Free cargo (n)', matlab="C nuc")
    g.add_edge('Cargo·Impβ (c)', 'Free Impβ (c)', matlab="C cyto")
    g.add_edge('Cargo·Impβ (c)', 'Free cargo (c)', matlab="C cyto")

    g.add_edge('Cargo·Impβ (n)',
               'Free cargo (n)',
               matlab="Cargo knock-off nuc")
    g.add_edge('Cargo·Impβ (n)',
               'Impβ·Ran·GTP (n)',
               matlab="Cargo knock-off nuc")
    g.add_edge('Ran·GTP (n)', 'Impβ·Ran·GTP (n)', matlab="Cargo knock-off nuc")

    g.add_edge('Cargo·Impβ (c)',
               'Free cargo (c)',
               matlab="Cargo knock-off cyto")
    g.add_edge('Cargo·Impβ (c)',
               'Impβ·Ran·GTP (c)',
               matlab="Cargo knock-off cyto")
    g.add_edge('Ran·GTP (c)',
               'Impβ·Ran·GTP (c)',
               matlab="Cargo knock-off cyto")

    if not set(g.nodes).issubset(species):
        log.warning(f"Don't know species: {set(g.nodes) - set(species)}.")

    return g
Beispiel #2
0
def if_sheets_fails(retry_state: RetryCallState):
    assert isinstance(retry_state.outcome, Future)
    log.warning(
        f"Fetching sheet failed with exception `{retry_state.outcome.exception()}`."
    )
    log.warning(f"LOADING FROM DISK.")
    df = pd.read_table(unlist1(out_dir.glob("*.tsv")),
                       dtype=str,
                       na_filter=None)
    return df
Beispiel #3
0
def print_scenario(df, c):
    log.info(f"Making scenario: {c}.")

    print(f'% Autogenerated by {relpath(__file__)} on {Now()}.')
    print(f'')

    for (i, c, n, p, u, v) in zip(df.Item, df.Compartment, df.Name, df.Parameter, df.Units, df[c]):
        if (v.lower() in ["", "default"]):
            continue

        log.info(f"Processing item: `{i}`.")

        if (i == ""):
            pass
        elif (i == "Reaction"):
            print(
                *[
                    f'r = m.Reactions({{m.Reactions.Name}} == "{n}");',
                    f'k = r.KineticLaw;',
                    f'p = k.Parameters({{k.Parameters.Name}} == "{p}");',
                    f'assert(1 == length(p));',
                    f'assert(p.Units == "{u}");',
                    f'p.Value = {v};',
                    f'',
                ],
                sep='\n'
            )
        elif (i == "Compartment"):
            print(
                *[
                    f'c = m.Compartments({{m.Compartments.Name}} == "{n}");',
                    f'assert(1 == length(c));',
                    f'assert(c.Units == "{u}");',
                    f'c.Value = {v};',
                    f'',
                ],
                sep='\n'
            )
        elif (i == "Species"):
            assert (p == "Value")
            assert (c != "")  # Compartment
            print(
                *[
                    f'c = [m.Species.Parent];',
                    f's = m.Species(({{m.Species.Name}} == "{n}") & ({{c.Name}} == "{c}"));',
                    f'assert(1 == length(s));',
                    f'assert(s.Units == "{u}");',
                    f's.Value = {v};',
                    f'',
                ],
                sep='\n'
            )
        else:
            log.warning(f"Unknown item: `{i}`.")
Beispiel #4
0
def plot_total_timecourse(run, spp):
    with Plox(style) as px:
        fmt = {
            '(c)': dict(ls="--", lw=3, alpha=0.5),
            '(n)': dict(ls="-.", lw=3, alpha=0.9),
            'NPC': dict(ls="-", lw=3, alpha=0.8),
        }

        for s in fmt:
            px.a.plot(1, 0, **fmt[s], color='k', label=f"...{s}")

        color = f"C{0}"
        # px.a.plot(1, 0, "-", color=color, label=label)

        # Aggregate by suffix
        spp_by_suffix = {
            suffix: [sp for sp in spp if sp.endswith(suffix)]
            for suffix in {"(c)", "(n)", "NPC"}
        }

        if len(spp) != sum(map(len, spp_by_suffix.values())):
            log.warning(
                f"Unknown suffix for species: {set(spp) - set(from_iterable(spp_by_suffix.values()))}"
            )

        # time x species table of concentrations
        tx: pd.DataFrame = run.tx

        for (suffix, spp) in spp_by_suffix.items():
            if suffix == 'NPC':
                f = NPC_CONCENTRATION_FACTOR
            else:
                f = 1

            x = tx[spp].sum(axis=1) * f
            px.a.plot(tx.index / 3600, x, **fmt[suffix], color=color)

        px.a.set_yticks([y for y in px.a.get_yticks() if (y >= 0)])
        px.a.set_yticklabels([f"{y:.2g}" for y in px.a.get_yticks()])

        px.a.set_xlabel(f"Time, h")

        px.a.set_xscale('log')
        px.a.legend(loc="upper left")

        yield px
Beispiel #5
0
def main():
    from data_source import runs

    summary = pd.DataFrame()

    for (i, run) in sorted(runs.iterrows()):
        for (sp_display, sp_pattern) in sp_specs.items():
            # Species to include in the plot
            collect_spp = [
                candidate for candidate in run.tx.columns
                if re.match(sp_pattern, candidate)
            ]

            if collect_spp:
                log.info(f"Species for spec `{sp_display}`: {collect_spp}.")
            else:
                log.warning(f"No species selected for spec `{sp_display}`.")

            # File name and proto-ylabel
            name = sp_display

            for px in plot_total_steadystate(run, collect_spp):
                img_file = mkdir(out_dir / i) / f"{name}.png"
                summary.loc[name, i] = img_file

                label = fr"{name}, $\mu$M"
                label = label.replace("Δ",
                                      r"$\Delta$")  # pdflatex issue with UTF
                px.a.set_title(label, fontdict={'fontsize': 20})

                log.info(f"Writing: {relpath(img_file)}")
                px.f.savefig(img_file)

    # Write an HTML overview

    with (out_dir / "index.html").open(mode='w') as fd:
        with contextlib.redirect_stdout(fd):
            print(
                summary.applymap(lambda p: os.path.relpath(p, out_dir)).
                applymap(
                    lambda p:
                    f'<a href="{p}"><img style="width:{IMG_WIDTH}px" src="{p}"/></a>'
                ).to_html(escape=False))
Beispiel #6
0
def plot_total_timecourse(run, spp):
    with Plox(style) as px:
        fmt = {
            '(c)': dict(ls="--", lw=2, alpha=0.5),
            '(n)': dict(ls="-.", lw=2, alpha=0.9),
            'NPC': dict(ls="-", lw=2, alpha=0.8),
        }

        for s in fmt:
            px.a.plot(1, 0, **fmt[s], color='k', label=f"...{s}")

        color = f"C{0}"
        # px.a.plot(1, 0, "-", color=color, label=label)

        # Aggregate by suffix
        spp_by_suffix = {
            suffix: [sp for sp in spp if sp.endswith(suffix)]
            for suffix in {"(c)", "(n)", "NPC"}
        }

        if (sum(map(len, spp_by_suffix.values())) != len(spp)):
            log.warning(
                f"Unknown suffix for species: {set(spp) - set(from_iterable(spp_by_suffix.values()))}"
            )

        # time x species table of concentrations
        tx: pd.DataFrame = run.tx

        for (suffix, spp) in spp_by_suffix.items():
            x = tx[spp].sum(axis=1)
            px.a.plot(tx.index / 3600, x, **fmt[suffix], color=color)

        px.a.set_xlabel(f"Time, h")

        px.a.set_xscale('log')
        px.a.legend(fontsize=10)

        yield px
Beispiel #7
0
def plot_total_steadystate(run, spp):
    with Plox(style) as px:
        fmt = {
            '(c)': dict(ls="--", lw=2, alpha=0.5),
            '(n)': dict(ls="-.", lw=2, alpha=0.9),
            'NPC': dict(ls="-", lw=2, alpha=0.8),
        }

        # for s in fmt:
        #     px.a.plot(1, 0, **fmt[s], color='k', label=f"...{s}")

        color = f"C{0}"
        # px.a.plot(1, 0, "-", color=color, label=label)

        # Aggregate by suffix
        spp_by_suffix = {
            suffix: [sp for sp in spp if sp.endswith(suffix)]
            for suffix in ["(c)", "NPC", "(n)"]  # order for display
        }

        if (sum(map(len, spp_by_suffix.values())) != len(spp)):
            log.warning(f"Unknown suffix for species: {set(spp) - set(from_iterable(spp_by_suffix.values()))}")

        # `time` x `species` table of concentrations
        tx: pd.DataFrame = run.tx

        agg_by_suffix = pd.DataFrame(data={
            suffix: tx[spp].sum(axis=1)
            for (suffix, spp) in spp_by_suffix.items()
        })

        x01: pd.DataFrame = agg_by_suffix.iloc[[0, -1]]

        # Make heatmap

        cmap = mcolors.LinearSegmentedColormap.from_list('concentration', ["white", "darkblue"])

        # vmax = 10 ** np.ceil(np.log10(x01.max().max()))
        # vmax = x01.values.sum().sum()
        vmax = x01.loc[:, spp_by_suffix].sum(axis=1).max()

        # sanity fix
        vmax = (vmax if not np.isclose(vmax, 0) else 1)

        im = px.a.imshow(x01, cmap=cmap, vmin=0, vmax=vmax, origin="upper", aspect="auto")

        assert (2 == len(x01.index)), "Expect initial and final state in rows."
        px.a.set_yticks(np.arange(0, len(x01.index)))
        px.a.set_yticklabels(["Initial", "Final"])

        px.a.set_xticks(np.arange(0, len(x01.columns)))
        px.a.set_xticklabels(x01.columns)

        for i in range(x01.shape[0]):
            for j in range(x01.shape[1]):
                alignment = dict(ha="center", va="center")
                im.axes.text(j, i, "{:.3g}".format(x01.iloc[i, j]), fontsize=17, color="red", **alignment)

        # (xlim, ylim) = (px.a.get_xlim(), px.a.get_ylim())

        yield px
Beispiel #8
0
from sklearn.metrics.pairwise import cosine_similarity

out_dir = mkdir(Path(__file__).with_suffix(''))

try:
    dataset_name = "CasteloBranco-2018"

    cb_dir = str(
        next((p.resolve() for p in Path.cwd().parents
              for p in p.glob(f"**/{dataset_name}")), None))
    (cb_dir in sys.path) or sys.path.append(cb_dir)

    from z_sources import df_expr as df_expr_cb, df_meta as df_meta_cb
except ImportError:
    log.warning("Import from z_sources failed.")
    raise

try:
    ab_dir = next((p.resolve() for p in Path.cwd().parents
                   for p in p.glob("**/Mouse-WCH-2020")), None)
    df_expr_ab = pd.read_table(unlist1(ab_dir.glob("*fewer_cells/*data*")),
                               index_col=0)
    df_meta_ab = pd.read_table(unlist1(ab_dir.glob("*fewer_cells/*meta*")),
                               index_col=0)
    df_meta_ab = df_meta_ab.reindex(df_expr_ab.columns)
except Exception:
    raise

# Only keep the genes in common and sort consistently
(df_expr_ab, df_expr_cb) = df_expr_ab.align(df_expr_cb.T, join="inner", axis=0)
Beispiel #9
0
def main():
    if PARAM.DUMMY_MODE:
        log.info("WE'RE IN DUMMY MODE.")

    out_dir = mkdir(Path(__file__).with_suffix(''))

    if PARAM.DUMMY_MODE:
        data_file = out_dir / "dummy_data.csv.gz"
        meta_file = out_dir / "dummy_meta.csv.gz"
    else:
        data_file = out_dir / "data.csv.gz"
        meta_file = out_dir / "meta.csv.gz"

    def peek(x):
        log.info(x)
        return x

    def meta_open_remote():
        if PARAM.DUMMY_MODE:
            return open(unlist1(download.local_folder.glob("dummy_meta.csv")),
                        mode='r')
        else:
            return download(URLS['meta']).now.open()

    def data_open_remote():
        if PARAM.DUMMY_MODE:
            return open(unlist1(download.local_folder.glob("dummy_data.csv")),
                        mode='r')
        else:
            return closing(urllib.request.urlopen(url=URLS['expr']))

    # Make a reduced metadata file
    with meta_open_remote() as fd:
        if meta_file.exists():
            log.warning(
                f"File will be overwritten when done: {relpath(meta_file)}")

        df_meta = pd.read_csv(fd, sep=PARAM.remote_sep, index_col=0)
        assert (df_meta.shape == (len(df_meta), 56))

        nsamples_total = len(df_meta)
        log.info(
            f"Based on the metadata, there are {nsamples_total} in total.")

        # Subset df_meta to samples of interest

        if PARAM.DUMMY_MODE:
            ix = df_meta.sample(12, random_state=5, replace=False).index
        else:
            ix = df_meta.index[df_meta.subclass_label.isin(
                PARAM.subclass_of_interest)]

        df_meta = df_meta[df_meta.index.isin(ix)]
        df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip')

        log.info(f"Size of reduced dataset: {len(df_meta)}.")
        log.info(f"Finished {relpath(meta_file)}")

    # Make a reduced expression data file
    with data_open_remote() as rd:
        if data_file.exists():
            log.warning(
                f"File will be overwritten when done: {relpath(data_file)}")

        if PARAM.DUMMY_MODE:
            chunksize = 24
        else:
            chunksize = 1024

        nchunks_expected = (nsamples_total // chunksize) + bool(
            (nsamples_total % chunksize))

        log.info(
            f"Chunksize is {chunksize} rows. Expect {nchunks_expected} chunks."
        )
        log.info(f"Downloading.")

        df_data = pd.concat(
            axis=0,
            objs=[
                chunk[chunk.index.isin(df_meta.index)]
                for chunk in progressbar(pd.read_csv(
                    rd, sep=PARAM.remote_sep, index_col=0,
                    chunksize=chunksize),
                                         max_value=nchunks_expected)
                if any(chunk.index.isin(df_meta.index))
            ])

        # genes x samples
        df_data = df_data.T

        df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip')

        log.info(
            f"Data has {len(df_data.columns)} samples, expected {len(df_meta)}."
        )
        log.info(f"Finished {relpath(data_file)}")
Beispiel #10
0
def main():
    if PARAM.DUMMY_MODE:
        log.info("WE'RE IN DUMMY MODE.")

    out_dir = mkdir(Path(__file__).with_suffix(''))

    if PARAM.DUMMY_MODE:
        data_file = out_dir / "dummy_data.csv.gz"
        meta_file = out_dir / "dummy_meta.csv.gz"
    else:
        data_file = out_dir / "data.csv.gz"
        meta_file = out_dir / "meta.csv.gz"

    def peek(x, text=None):
        if text is None:
            log.info(x)
        else:
            log.info(text)
        return x

    def meta_open_remote():
        if PARAM.DUMMY_MODE:
            return open(unlist1(download.local_folder.glob("dummy_meta.csv")), mode='r')
        else:
            return download(URLS['meta']).now.open()

    def data_open_remote():
        if PARAM.DUMMY_MODE:
            return open(unlist1(download.local_folder.glob("dummy_data.csv")), mode='r')
        else:
            return closing(urllib.request.urlopen(url=URLS['expr']))

    # Metadata
    with meta_open_remote() as fd:
        if meta_file.exists():
            log.warning(f"File will be overwritten when done: {relpath(meta_file)}")

        df_meta = pd.read_csv(fd, sep=PARAM.remote_sep, index_col=0)
        assert (df_meta.shape == (len(df_meta), 56))

        nsamples_total = len(df_meta)
        log.info(f"Based on metadata, there are {nsamples_total} samples.")

        df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip')

        log.info(f"Size of reduced dataset: {len(df_meta)}.")
        log.info(f"Finished {relpath(meta_file)}")

        del df_meta

    # Collect expression
    with data_open_remote() as rd:
        if data_file.exists():
            log.warning(f"File will be overwritten when done: {relpath(data_file)}")

        if PARAM.DUMMY_MODE:
            chunksize = 24
        else:
            chunksize = 128

        nchunks_expected = (nsamples_total // chunksize) + bool((nsamples_total % chunksize))

        log.info(f"Chunksize is {chunksize} rows. Expect {nchunks_expected} chunks.")
        log.info(f"Downloading.")

        df_data = pd.concat(axis=0, objs=(
            chunk.astype(pd.SparseDtype('int', fill_value=0))
            for chunk in progressbar(
            pd.read_csv(
                rd, sep=PARAM.remote_sep, index_col=0, chunksize=chunksize
            ),
            max_value=nchunks_expected
        )
        ))

        log.info(f"Sparse density: {df_data.sparse.density}")

        # genes x samples
        df_data = df_data.T

        df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip')

        log.info(f"Data has {len(df_data.columns)} samples.")
        log.info(f"Finished {relpath(data_file)}")
Beispiel #11
0
def show(g: nx.MultiDiGraph, state: pd.Series):
    # pos = nx.shell_layout(g)
    # pos = nx.planar_layout(g)
    # pos = nx.spring_layout(g)
    pos = {
        'Free cargo (c)': [-6, +8],
        'Cargo·Impβ (c)': [+6, +8],
        'Impβ·Ran·GTP (c)': [-4, +6],
        'Free Impβ (c)': [+4, +6],
        'Ran·GTP (c)': [-2, +4],
        'Ran·GDP (c)': [+2, +4],
        'RanBP1·Ran·GTP (c)': [0, 3],
        # 'RanBP1 (c)': [0, 1],
        'Ran·GTP (n)': [-2, -2],
        'Ran·GDP (n)': [+2, -2],
        'Impβ·Ran·GTP (n)': [-4, -4],
        'Free Impβ (n)': [+4, -4],
        'Free cargo (n)': [-6, -6],
        'Cargo·Impβ (n)': [+6, -6],
    }
    # pos = nx.get_node_attributes(g, name='pos')

    species = pd.Series(nx.get_node_attributes(g, name='matlab'))
    # species = pd.DataFrame({'matlab': species, 'value': species.map(state)})
    species = species.map(state)

    if any(species.isna()):
        log.warning(
            f"Species have no data: \n{list(species[species.isna()].index)}")

    # Directed multifluxes
    fluxes = pd.Series(nx.get_edge_attributes(g, name='matlab'))
    fluxes = pd.DataFrame({'matlab': fluxes, 'value': fluxes.map(state)})
    # Directed fluxes
    f = fluxes.reset_index().groupby(
        by=['level_0', 'level_1']).value.sum().to_dict()
    # One directed edge per pair - version of the graph
    ug = (lambda ug: nx.DiGraph(ug).edge_subgraph(ug.edges))(nx.Graph(g))
    # Combined fluxes
    fluxes = pd.Series(index=list(ug.edges), data=list(
        ug.edges)).transform(lambda e: (f.get(e, 0) - f.get((e[1], e[0]), 0)))

    if any(fluxes.isna()):
        log.warning(
            f"Fluxes have no data: \n{list(fluxes[fluxes.isna()].index)}")

    # log.info(f"Species: \n{species}")
    # log.info(f"Fluxes: \n{fluxes}")

    node_size = species.fillna(0)
    # node_size = node_size[node_size > 0]
    # node_size = node_size / node_size.sum()
    node_size = 300 * node_size

    node_labels = pd.Series(data=species.index, index=species.index)
    node_labels = node_labels.transform(
        lambda s: s.replace("(c)", "").replace("(n)", "").strip())

    edge_width = fluxes
    edge_width = edge_width[edge_width != 0]
    (fwd, bwd) = (edge_width[edge_width >= 0].index,
                  edge_width[edge_width < 0].index)
    edge_width = edge_width.transform(lambda x: np.log10(np.abs(x)))
    edge_width = 0.4 + (edge_width - edge_width.min()) / (
        (edge_width.max() - edge_width.min()) or 1)
    edge_alpha = 0.7 * (edge_width / edge_width.max())

    edge_labels = fluxes.abs().transform(lambda x: f"{x:0.02g}")

    style = {
        rc.Figure.frameon: False,
    }

    with Plox(style) as px:
        kw = dict(G=g, pos=pos, ax=px.a, alpha=0.4)
        nx.draw_networkx_nodes(**kw,
                               nodelist=node_size.index,
                               node_size=node_size,
                               node_color="C0",
                               linewidths=0)

        kw = dict(G=ug, pos=pos, ax=px.a, edge_color='g')
        nx.draw_networkx_edges(**kw,
                               width=edge_width[fwd],
                               alpha=edge_alpha[fwd],
                               edgelist=fwd)
        nx.draw_networkx_edges(**kw,
                               width=edge_width[bwd],
                               alpha=edge_alpha[bwd],
                               edgelist=[(v, ug) for (ug, v) in bwd])

        kw = dict(G=ug, pos=pos, ax=px.a, alpha=0.7)
        nx.draw_networkx_edge_labels(**kw,
                                     edge_labels=edge_labels.to_dict(),
                                     font_size=5,
                                     font_color='g')

        kw = dict(G=g, pos=pos, ax=px.a, alpha=0.8, font_color='k')
        nx.draw_networkx_labels(**kw,
                                font_size=7,
                                labels=node_labels,
                                verticalalignment="bottom")
        nx.draw_networkx_labels(**kw,
                                font_size=6,
                                labels=species.transform(
                                    lambda x: f"{x:0.02g}"),
                                verticalalignment="top")

        px.a.axis('off')

        y = np.mean(px.a.get_ylim())
        px.a.plot(px.a.get_xlim(), [y, y], '--', color='k', lw=1, zorder=-100)

        kw = dict(x=(min(px.a.get_xlim()) + 0.8),
                  ha="center",
                  zorder=100,
                  alpha=0.5,
                  fontdict=dict(fontsize=6))
        px.a.text(**kw, y=(y + 0.1), s="Cytoplasm", va="bottom")
        px.a.text(**kw, y=(y - 0.1), s="Nucleus", va="top")

        out_dir = mkdir(Path(__file__).with_suffix(''))
        px.f.savefig(out_dir / "onion.png")
        px.f.savefig(out_dir / "onion.pdf")