def tsne(df, out_csv: Path): log.info(relpath(out_csv)) if out_csv.is_file(): X = pd.read_csv(out_csv, sep='\t', compression="infer", index_col=0) assert (X.index.name == "sample_name") assert (list(X.columns) == ['x', 'y']) else: assert (df.index.name == "gene_name") X = pd.DataFrame( index=pd.Series(df.columns, name="sample_name"), columns=['x', 'y'], data=TSNE(random_state=seed).fit_transform(df.T), ) X.to_csv(out_csv, sep='\t', compression="gzip") # https://matplotlib.org/tutorials/introductory/customizing.html style = { 'legend.fontsize': "xx-small", 'legend.framealpha': 0.5, } with Plox(style) as px: px.a.plot(X.x, X.y, '.', ms=(10 / np.log10(len(X)))) # px.a.legend() px.a.axis('off') px.f.savefig(out_csv.with_suffix(".png"))
def make_scenarios(df: pd.DataFrame): scenarios_start = 1 + min(i for (i, c) in enumerate(df.columns) if c.lower().startswith("scenario")) log.info(f"Scenarios start at column #{scenarios_start}.") for c in df.columns[scenarios_start:]: if c: with (out_dir / f"{c}.m").open(mode='w') as fd: with contextlib.redirect_stdout(fd): print_scenario(df, c)
def main(): from data_source import runs sp_specs = [ {'+': "CAS", '-': "ΔCAS"}, {'+': "CAS·Ran·GTP", '-': "ΔCAS·Ran·GTP"}, {'+': "ImpA·CAS·Ran·GTP"}, {'+': "ΔCAS·Ran·GTP"}, {'+': "ImpA·ΔCAS·Ran·GTP"}, {'+': "ΔCAS"}, {'+': "ImpB"}, {'+': "ImpA"}, {'+': "ImpA·ImpB"}, {'+': "Ran·GTP"}, {'+': "NLS"}, ] # `Not a species` placeholder nas = ("?" * 100) summary = pd.DataFrame() for (i, run) in sorted(runs.iterrows()): for sp_spec in sp_specs: # Species to include in the plot spp = [c for c in run.tx.columns if (sp_spec['+'] in c) and not (sp_spec.get('-', nas) in c)] # File name and proto-ylabel name = sp_spec['+'] + (f" (excl. {sp_spec['-']})" if ('-' in sp_spec) else "") for px in plot_total_steadystate(run, spp): img_file = mkdir(out_dir / i) / f"{name}.png" summary.loc[name, i] = img_file label = fr"{name}, $\mu$M" label = label.replace("Δ", r"$\Delta$") # pdflatex issue with UTF px.a.set_title(label) log.info(f"Writing: {relpath(img_file)}") px.f.savefig(img_file) # Write an HTML overview with (out_dir / "index.html").open(mode='w') as fd: with contextlib.redirect_stdout(fd): print( summary.applymap( lambda p: os.path.relpath(p, out_dir) ).applymap( lambda p: f'<a href="{p}"><img style="width:196px" src="{p}"/></a>' ).to_html( escape=False ) )
def main(): from data_source import runs runs = runs['results_fig4a'] for (RanGAP, runs) in runs.groupby('RanGAP'): if RanGAP: for px in process(runs): filepath = out_dir / f"RanGAP={RanGAP}.png" log.info(f"Writing {relpath(filepath)} .") px.f.savefig(filepath)
def print_scenario(df, c): log.info(f"Making scenario: {c}.") print(f'% Autogenerated by {relpath(__file__)} on {Now()}.') print(f'') for (i, c, n, p, u, v) in zip(df.Item, df.Compartment, df.Name, df.Parameter, df.Units, df[c]): if (v.lower() in ["", "default"]): continue log.info(f"Processing item: `{i}`.") if (i == ""): pass elif (i == "Reaction"): print( *[ f'r = m.Reactions({{m.Reactions.Name}} == "{n}");', f'k = r.KineticLaw;', f'p = k.Parameters({{k.Parameters.Name}} == "{p}");', f'assert(1 == length(p));', f'assert(p.Units == "{u}");', f'p.Value = {v};', f'', ], sep='\n' ) elif (i == "Compartment"): print( *[ f'c = m.Compartments({{m.Compartments.Name}} == "{n}");', f'assert(1 == length(c));', f'assert(c.Units == "{u}");', f'c.Value = {v};', f'', ], sep='\n' ) elif (i == "Species"): assert (p == "Value") assert (c != "") # Compartment print( *[ f'c = [m.Species.Parent];', f's = m.Species(({{m.Species.Name}} == "{n}") & ({{c.Name}} == "{c}"));', f'assert(1 == length(s));', f'assert(s.Units == "{u}");', f's.Value = {v};', f'', ], sep='\n' ) else: log.warning(f"Unknown item: `{i}`.")
def main(): from data_source import runs summary = pd.DataFrame() for (i, run) in sorted(runs.iterrows()): for (sp_display, sp_pattern) in sp_specs.items(): # Species to include in the plot collect_spp = [ candidate for candidate in run.tx.columns if re.match(sp_pattern, candidate) ] if collect_spp: log.info(f"Species for spec `{sp_display}`: {collect_spp}.") else: log.warning(f"No species selected for spec `{sp_display}`.") # File name and proto-ylabel name = sp_display for px in plot_total_steadystate(run, collect_spp): img_file = mkdir(out_dir / i) / f"{name}.png" summary.loc[name, i] = img_file label = fr"{name}, $\mu$M" label = label.replace("Δ", r"$\Delta$") # pdflatex issue with UTF px.a.set_title(label, fontdict={'fontsize': 20}) log.info(f"Writing: {relpath(img_file)}") px.f.savefig(img_file) # Write an HTML overview with (out_dir / "index.html").open(mode='w') as fd: with contextlib.redirect_stdout(fd): print( summary.applymap(lambda p: os.path.relpath(p, out_dir)). applymap( lambda p: f'<a href="{p}"><img style="width:{IMG_WIDTH}px" src="{p}"/></a>' ).to_html(escape=False))
def download_expr(): log.info("Downloading the expr data.") log.info(download(URLS['expr']).now.meta)
def download_meta(): log.info("Downloading the meta data.") with download(URLS['meta']).now.open() as rd: df_meta = pd.read_csv(rd, sep=',', index_col=0) # print(json.dumps(Counter(df_meta.subclass_label), indent=2)) summary = { "NaN": 4014, "L2/3 IT ENTl": 5764, "L2 IT RHP": 7599, "L2/3 IT PPP": 34084, "L2 IT ENTl": 4068, "L4/5 IT CTX": 253722, "L5 PT CTX": 16783, "L5 IT TPE-ENT": 5525, "L2/3 IT CTX-1": 117565, "L3 IT ENT": 13789, "L3 RSP-ACA": 4214, "L2/3 IT CTX-2": 7141, "L6 IT CTX": 79403, "L5 PPP": 1240, "L6 IT ENTl": 1169, "L5 IT CTX": 44889, "L5 NP CTX": 29378, "L6 CT CTX": 135241, "L6b CTX": 13114, "L6b/CT ENT": 20789, "NP SUB": 1949, "NP PPP": 2695, "V3d": 66, "Meis2": 1, "Lamp5": 38464, "Vip": 41626, "Sncg": 11573, "Sst": 42310, "Pvalb": 31088, "Sst Chodl": 1906, "DG": 58754, "CA1-ProS": 16141, "Car3": 21538, "SUB-ProS": 4406, "CT SUB": 6012, "CA2": 336, "CA3": 1899, "CR": 268, "Oligo": 7685, "Astro": 3119, "SMC-Peri": 198, "Endo": 746, "VLMC": 129, "Micro-PVM": 636 } log.info("Making the dummy set.") with closing(urllib.request.urlopen(url=URLS['expr'])) as rd: df_data = pd.read_csv(rd, sep=',', index_col=0, nrows=50).iloc[:, 0:101] df_meta = df_meta.loc[df_data.index] df_meta.to_csv(download.local_folder / "dummy_meta.csv", sep=',') df_data.to_csv(download.local_folder / "dummy_data.csv", sep=',') log.info("Dummy set done.") log.info("Run b_reduced.py to download the reduced dataset.")
data=data.x, dtype=float, ).loc[data.t.squeeze() >= data.t_react.squeeze()] ) load_params = First(str).then(loadmat).then(pd.Series).then( lambda data: data[["ImpB", "RanBP1", "RanGAP", "t_react"]].transform(lambda x: np.array(x).squeeze()), ) data = pd.DataFrame({ file.stem: pd.Series({ 'tx': load_tx(file), **load_params(file).to_dict() }) for file in folder.glob("*.mat") }) return data.T runs = { folder.name: load_runs(folder) for folder in Path(__file__).parent.parent.glob("results*") if folder.is_dir() } log.info(f"Loaded runs from folders: {', '.join(runs)} .") if __name__ == '__main__': print(runs)
def peek(x): log.info(x) return x
def main(): if PARAM.DUMMY_MODE: log.info("WE'RE IN DUMMY MODE.") out_dir = mkdir(Path(__file__).with_suffix('')) if PARAM.DUMMY_MODE: data_file = out_dir / "dummy_data.csv.gz" meta_file = out_dir / "dummy_meta.csv.gz" else: data_file = out_dir / "data.csv.gz" meta_file = out_dir / "meta.csv.gz" def peek(x): log.info(x) return x def meta_open_remote(): if PARAM.DUMMY_MODE: return open(unlist1(download.local_folder.glob("dummy_meta.csv")), mode='r') else: return download(URLS['meta']).now.open() def data_open_remote(): if PARAM.DUMMY_MODE: return open(unlist1(download.local_folder.glob("dummy_data.csv")), mode='r') else: return closing(urllib.request.urlopen(url=URLS['expr'])) # Make a reduced metadata file with meta_open_remote() as fd: if meta_file.exists(): log.warning( f"File will be overwritten when done: {relpath(meta_file)}") df_meta = pd.read_csv(fd, sep=PARAM.remote_sep, index_col=0) assert (df_meta.shape == (len(df_meta), 56)) nsamples_total = len(df_meta) log.info( f"Based on the metadata, there are {nsamples_total} in total.") # Subset df_meta to samples of interest if PARAM.DUMMY_MODE: ix = df_meta.sample(12, random_state=5, replace=False).index else: ix = df_meta.index[df_meta.subclass_label.isin( PARAM.subclass_of_interest)] df_meta = df_meta[df_meta.index.isin(ix)] df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip') log.info(f"Size of reduced dataset: {len(df_meta)}.") log.info(f"Finished {relpath(meta_file)}") # Make a reduced expression data file with data_open_remote() as rd: if data_file.exists(): log.warning( f"File will be overwritten when done: {relpath(data_file)}") if PARAM.DUMMY_MODE: chunksize = 24 else: chunksize = 1024 nchunks_expected = (nsamples_total // chunksize) + bool( (nsamples_total % chunksize)) log.info( f"Chunksize is {chunksize} rows. Expect {nchunks_expected} chunks." ) log.info(f"Downloading.") df_data = pd.concat( axis=0, objs=[ chunk[chunk.index.isin(df_meta.index)] for chunk in progressbar(pd.read_csv( rd, sep=PARAM.remote_sep, index_col=0, chunksize=chunksize), max_value=nchunks_expected) if any(chunk.index.isin(df_meta.index)) ]) # genes x samples df_data = df_data.T df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip') log.info( f"Data has {len(df_data.columns)} samples, expected {len(df_meta)}." ) log.info(f"Finished {relpath(data_file)}")
def load_runs(folder) -> pd.DataFrame: load_tx = First(str).then(loadmat).then(pd.Series).then( lambda data: pd.DataFrame( index=pd.Series(data.t.squeeze(), name='t', dtype=float), columns=pd.Series(data.names.squeeze(), name='species').transform(unlist1), data=data.x, dtype=float, ) ) load_params = First(str).then(loadmat).then(pd.Series) data = pd.DataFrame({ file.stem: pd.Series({ 'tx': load_tx(file), **load_params(file).to_dict() }) for file in folder.glob("*.mat") }) return data.T runs = load_runs(base) log.info(f"Loaded runs: {', '.join(runs.index)}") if __name__ == '__main__': pass
def peek(x, text=None): if text is None: log.info(x) else: log.info(text) return x
def main(): if PARAM.DUMMY_MODE: log.info("WE'RE IN DUMMY MODE.") out_dir = mkdir(Path(__file__).with_suffix('')) if PARAM.DUMMY_MODE: data_file = out_dir / "dummy_data.csv.gz" meta_file = out_dir / "dummy_meta.csv.gz" else: data_file = out_dir / "data.csv.gz" meta_file = out_dir / "meta.csv.gz" def peek(x, text=None): if text is None: log.info(x) else: log.info(text) return x def meta_open_remote(): if PARAM.DUMMY_MODE: return open(unlist1(download.local_folder.glob("dummy_meta.csv")), mode='r') else: return download(URLS['meta']).now.open() def data_open_remote(): if PARAM.DUMMY_MODE: return open(unlist1(download.local_folder.glob("dummy_data.csv")), mode='r') else: return closing(urllib.request.urlopen(url=URLS['expr'])) # Metadata with meta_open_remote() as fd: if meta_file.exists(): log.warning(f"File will be overwritten when done: {relpath(meta_file)}") df_meta = pd.read_csv(fd, sep=PARAM.remote_sep, index_col=0) assert (df_meta.shape == (len(df_meta), 56)) nsamples_total = len(df_meta) log.info(f"Based on metadata, there are {nsamples_total} samples.") df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip') log.info(f"Size of reduced dataset: {len(df_meta)}.") log.info(f"Finished {relpath(meta_file)}") del df_meta # Collect expression with data_open_remote() as rd: if data_file.exists(): log.warning(f"File will be overwritten when done: {relpath(data_file)}") if PARAM.DUMMY_MODE: chunksize = 24 else: chunksize = 128 nchunks_expected = (nsamples_total // chunksize) + bool((nsamples_total % chunksize)) log.info(f"Chunksize is {chunksize} rows. Expect {nchunks_expected} chunks.") log.info(f"Downloading.") df_data = pd.concat(axis=0, objs=( chunk.astype(pd.SparseDtype('int', fill_value=0)) for chunk in progressbar( pd.read_csv( rd, sep=PARAM.remote_sep, index_col=0, chunksize=chunksize ), max_value=nchunks_expected ) )) log.info(f"Sparse density: {df_data.sparse.density}") # genes x samples df_data = df_data.T df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip') log.info(f"Data has {len(df_data.columns)} samples.") log.info(f"Finished {relpath(data_file)}")
assert file.is_file() from scipy.io import loadmat data = pd.Series(loadmat(str(file))) # print(data) X = pd.DataFrame( index=pd.Series(data.t.squeeze(), name='t', dtype=float), columns=pd.Series(data.names.squeeze(), name='species').transform(unlist1), data=data.x, dtype=float, ) X = X[(1e-1 <= X.index) & (X.index <= 1e3)] log.info(f"Effective k_d: {X['k_d_eff'].median()}") X = X[["IBB", "IBB'", "ImpB", "ImpB'", "IBB·ImpB", "IBB*·ImpB", "IBB·ImpB'"]] kw = { "IBB": dict(color='C3', ls='-', lw=3), "IBB'": dict(color='C3', ls='--', lw=3), "ImpB": dict(color='C0', ls='-', lw=3), "ImpB'": dict(color='C0', ls='--', lw=3), "IBB·ImpB": dict(color='C1', ls='-', lw=2), "IBB*·ImpB": dict(color='C4', ls='-', lw=3), "IBB·ImpB'": dict(color='C4', ls='--', lw=3), } style = { rcParam.Font.size: 12,
def main(): from a_download import df_meta out_dir = mkdir(Path(__file__).with_suffix('')) data_file = out_dir / "data.csv.gz" meta_file = out_dir / "meta.csv.gz" if True: df_meta = df_meta[df_meta.subclass_label.isin( PARAM.subclass_of_interest)] log.info( f"New subset of cells: {dict(df_meta.subclass_label.value_counts())}" ) df_meta.to_csv(meta_file, sep=PARAM.local_sep, compression='gzip') log.info(f"Size of reduced dataset: {len(df_meta)}.") log.info(f"Finished {relpath(meta_file)}") with download(URLS['expr']).now.open(mode='r') as fd: log.info(f"Reducing the expression data.") df_data = pd.concat( axis=0, objs=(df[df.index.isin(df_meta.index)] for df in pd.read_csv( fd, sep=PARAM.remote_sep, index_col=0, chunksize=1024) if any(df.index.isin(df_meta.index)))) # genes x samples df_data = df_data.T df_data.to_csv(data_file, sep=PARAM.local_sep, compression='gzip') log.info( f"Data has {len(df_data.columns)} samples, expected {len(df_meta)}." ) log.info(f"Finished {relpath(data_file)}")