def test_inv(): df = tibble(x=1, y=2) out = df >> select(~f.x) assert out.columns.tolist() == ["y"] df = tibble(x=True) out = df >> mutate(y=~f.x) assert out.y.tolist() == [False]
def gene_name_conversion( genes, species, infmt, outfmt, notfound, ): """Convert gene names using MyGeneInfo Args: genes: A sequence of genes species: The species to limit the query Supported: human, mouse, rat, fruitfly, nematode, zebrafish, thale-cress, frog and pig infmt: What's the original gene name format Available fields https://docs.mygene.info/en/latest/doc/query_service.html#available-fields outfmt: What's the target gene name format notfound: What to do if a conversion cannot be done. use-query: Ignore the conversion and use the original name skip: Ignore the conversion and skip the entire row in input file error: Report error Returns: A dataframe with two columns, query and `outfmt`. """ out = (mygene.querymany( genes, scopes=infmt, fields=outfmt, as_dataframe=True, df_index=False, species=species, ) >> group_by(f.query) >> arrange(desc(f._score)) >> slice_head(1) >> select(~c(f._id, f._score, f.notfound))) if isinstance(outfmt, str): outfmt = [of.strip() for of in outfmt.split(",")] out = tibble(query=genes) >> left_join(out, by=f.query) if notfound == "use-query": out = out >> mutate( across( outfmt, lambda col, query: if_else(is_na(col), query, col), query=f.query, )) elif notfound == "error" and any(is_na(out[outfmt[0]])): nagenes = out >> filter(is_na(f[outfmt[0]])) >> pull(f.query) raise QueryGenesNotFound(nagenes) elif notfound == "skip": out = out >> filter(~is_na(f[outfmt[0]])) return out
class TruvariBench(TruvariBench): requires = DownloadList input_data = lambda ch: tibble( compvcf=expand_dir(ch, pattern="input*.vcf.gz"), basevcf=expand_dir(ch, pattern="multi*.vcf.gz").iloc[0, 0], ) envs = { "ref": str( Path(__file__).parent.parent.parent / "data" / "reference" / "hg19" / "chrs.fa"), }
def test_and_or(): df = tibble(x=1, y=2, z=3, w=4) out = df >> select(c(f.x, f.y) & c(f.y, f.z)) assert out.columns.tolist() == ["y"] out = df >> mutate(a=f.x & f.y) assert out.a.tolist() == [True] out = df >> mutate(a=True & f.y) assert out.a.tolist() == [True] out = df >> select(c(f.x, f.y) | c(f.y, f.z)) assert out.columns.tolist() == ["x", "y", "z"]
def test_glimpse_html_df(): df = tibble(x=f[:20], y=[str(i) for i in range(20)]) g = glimpse(df, 100) out = g._repr_html_() assert "<table>" in out
def test_glimpse_str_gf(): df = tibble(x=f[:10], y=[str(i) for i in range(10)]) >> group_by(f.y) out = repr(glimpse(df)) assert "Groups: y [10]" in out
def test_glimpse_str_nest_df(): df = tibble(x=f[:10], y=f[10:20]) >> nest(data=~f.x) out = str(glimpse(df)) assert "Rows: 10" in out assert "Columns: 2" in out assert "<DF 1x1>, <DF 1x1>" in out
def test_glimpse_str_df(): df = tibble(x=f[:10], y=[str(i) for i in range(10)]) out = str(glimpse(df)) assert "Rows: 10" in out assert "Columns: 2" in out assert "0, 1, 2" in out
from datar.all import tibble, flatten from pipen import Proc from biopipen.ns.web import Download from biopipen.ns.plot import Heatmap from biopipen.core.testing import get_pipeline Heatmap = Proc.from_proc( Heatmap, requires=Download, input_data=lambda ch: tibble(infile=ch, annofiles=[flatten(ch)]), envs={ "globals": "data = head(data, 100)", "args": { "right_annotation": """r:rowAnnotation( Boxplot = anno_boxplot(as.matrix(head(annos, 100)), outline = F) )""" } }) def pipeline(): return get_pipeline(__file__).set_starts(Download).set_data([ "https://www.ncbi.nlm.nih.gov/geo/download/" "?acc=GSE179367" "&format=file" "&file=GSE179367%5Fgene%5Fcount%2Ereal%2Etxt%2Egz", ]) def testing(pipen):
def test_or_(): df = tibble(x=1, y=2, z=3) out = df >> select(c(f.x, f.y) | [f.y, f.z]) assert_frame_equal(out, tibble(x=1, y=2, z=3))
def test_right_recycle_to_left(): df = tibble(x=[True, False]) out = mutate(df, y=f.x | True) assert_frame_equal(out, tibble(x=[True, False], y=[True, True]))
def test_rowwise_gets_rowwise(): df = tibble(x=[1, 2, 3], y=[4, 5, 6]).rowwise() out = mutate(df, z=1 + f.y, w=-f.x, t=+f.y) assert out.z.is_rowwise assert out.w.is_rowwise
def test_op_getattr(): df = tibble(x=[1, 2], y=[1, -3]) out = mutate(df, z=(f.x * f.y).abs()) assert_frame_equal(out, tibble(x=[1, 2], y=[1, -3], z=[1, 6]))
def test_undefined_op(): df = tibble(x=[1, 2], y=[1, 3]) out = mutate(df, z=f.x * f.y) assert_frame_equal(out, tibble(x=[1, 2], y=[1, 3], z=[1, 6]))
def test_ne(): df = tibble(x=[1, 2], y=[1, 3]) out = mutate(df, z=f.x != f.y) assert_frame_equal(out, tibble(x=[1, 2], y=[1, 3], z=[False, True])) out = mutate(df, z=f.x.size != f.y.size) assert_frame_equal(out, tibble(x=[1, 2], y=[1, 3], z=[False, False]))
def test_apply_dtypes(): df = tibble(x=[1.0, 2.0]) apply_dtypes(df, True) assert is_integer(df.x)
def test_neg(): df = tibble(x=1, y=2) out = df >> select(-f[:1]) assert out.columns.tolist() == ["y"]
from pipen import Proc from biopipen.ns.web import Download from biopipen.ns.misc import Str2File from biopipen.ns.gsea import FGSEA from datar.all import flatten, tibble, select from biopipen.core.testing import get_pipeline FGSEA = Proc.from_proc( FGSEA, requires=[Download, Str2File], input_data=lambda ch1, ch2: tibble( *flatten(ch1), ch2, _name_repair="minimal", ) >> select(0, 2, 1), envs={"clscol": "Group", "classes": ["MMP9", "CTRL"]} ) def pipeline(): return get_pipeline(__file__).set_starts(Download, Str2File).set_data( [ "https://www.ncbi.nlm.nih.gov/geo/download/" "?acc=GSE179367" "&format=file" "&file=GSE179367%5Fgene%5Fcount%2Ereal%2Etxt%2Egz", "https://www.genepattern.org/tutorial/linkedFiles/" "export_gnf.GENE_SYMBOL.gmt", ], [(
def test_neg(): df = tibble(x=[1, 2]) out = df >> mutate(y=-f.x) assert_frame_equal(out, tibble(x=[1, 2], y=[-1, -2]))