def test_interleaved_context_managers(): with dppd(mtcars) as (dpX, X): with dppd(diamonds) as (dpY, Y): dpX.groupby("cyl") dpY.filter_by(Y.cut == "Ideal") dpX.summarize(("hp", np.mean, "mean_hp")) dpY.summarize(("price", np.max, "max_price")) should_X = (mtcars.groupby("cyl")[["hp"]].agg( np.mean).rename(columns={"hp": "mean_hp"})).reset_index() should_Y = (pd.DataFrame(diamonds[diamonds.cut == "Ideal"].max()[[ "price" ]]).transpose().rename(columns={"price": "max_price"})) should_Y["max_price"] = should_Y["max_price"].astype(int) assert_frame_equal(X, should_X) assert_frame_equal(Y, should_Y)
def test_straight_dp_raises(): dp, X = dppd() with pytest.raises(ValueError): dp.select(["hp", "cyl"]) with pytest.raises(ValueError): dp.loc[5]
def test_forking_context_manager(): with dppd(mtcars) as (dp, X): a = dp.select(["name", "hp", "cyl"]) b = dp.select("name").head().pd c = a.select("hp").head().pd dp.head() assert_series_equal(c["hp"], mtcars["hp"].head()) assert_series_equal(b["name"], mtcars["name"].head()) assert_frame_equal(X, mtcars[["hp"]].head())
def test_stacking(): dp, X = dppd() dp(mtcars).select(["name", "hp", "cyl"]) b = dp(mtcars).select("hp").pd assert_frame_equal(b, mtcars[["hp"]]) assert_frame_equal(X, mtcars[["name", "hp", "cyl"]]) c = dp.pd assert_frame_equal(c, mtcars[["name", "hp", "cyl"]]) assert X == None # noqa:E711 since it's the proxy, is will fail
def test_grouped_mutate_repeated_keys(): df = mtcars.copy() df.index = list(range(16)) + list(range(16)) with pytest.raises(ValueError): # cannot reindex from duplicate axis with dppd(df) as (ddf, X): ddf.groupby("cyl").mutate(grp_rank={ grp: sub_df.hp.rank() for (grp, sub_df) in X.itergroups() })
def test_rename(): df = pd.DataFrame({ "a": [str(x) for x in (range(10))], "bb": 10, "ccc": list(range(20, 30)) }) with dppd(df) as (ndf, X): ndf.rename(columns={"a": "a2", "bb": "ccc", "ccc": "c2"}) assert (X.columns == ["a2", "ccc", "c2"]).all()
def test_forking(): dp, X = dppd() a = dp(mtcars).select(["name", "hp", "cyl"]) b = dp.unselect("hp").select(X.name).head().pd with pytest.raises(AttributeError): c = a.select(X.hp).head().pd c = dp(a).select(X.hp).head().pd assert_series_equal(c["hp"], mtcars["hp"].head()) assert_series_equal(b["name"], mtcars["name"].head()) assert X == None # noqa:E711 since it's the proxy, is will fail
def test_dir(): from dppd import base dp, X = dppd() actual = set(dir(dp(mtcars))) should_min = set(base.property_registry[pd.DataFrame]) delta = should_min.difference(actual) print(sorted(actual)) print(sorted(delta)) assert not len(delta) assert len(actual) > len(should_min)
def test_context_manager(): df = pd.DataFrame({ "a": [str(x) for x in (range(10))], "bb": 10, "ccc": list(range(20, 30)) }).set_index("a") with dppd(df) as (d, X): d.head(5) d.tail(1) should = df.iloc[4:5] assert_frame_equal(X, should)
def test_context_manager_totally_to_pandas(): df = pd.DataFrame({ "a": [str(x) for x in (range(10))], "bb": 10, "ccc": list(range(20, 30)) }).set_index("a") with dppd(df) as (d, X): d.head(5) assert d.shape == (5, 2) d.tail(1) should = df.iloc[4:5] assert_frame_equal(X, should) assert isinstance(X, wrapt.ObjectProxy) X = X.pd assert not isinstance(X, wrapt.ObjectProxy) assert_frame_equal(X, should)
def test_mixing_context_manager_and_dp(): with dppd(mtcars) as (dpY, Y): dpY.sort_values("hp") dp(diamonds).filter_by(X.cut == "ideal") dpY.filter_by(Y.cyl.isin([4, 6])) actual_diamonds = dp().sort_values("price").head().pd actual_mtcars_full = dpY.pd dpY.head() actual_mtcars = dpY.pd should_diamonds = diamonds[diamonds.cut == "ideal"].sort_values( "price").head() should_mtcars = mtcars.sort_values("hp") should_mtcars_full = should_mtcars[should_mtcars["cyl"].isin([4, 6])] should_mtcars = should_mtcars_full.head() assert_frame_equal(should_diamonds, actual_diamonds) assert_frame_equal(should_mtcars, actual_mtcars) assert_frame_equal(should_mtcars_full, actual_mtcars_full)
def test_dataframe_subscript(): with dppd(mtcars) as (dp, X): actual = dp.head(5)["name"].pd should = mtcars["name"].head(5) assert_series_equal(actual, should)
def test_dp_on_empty_stack_raises(): dp, X = dppd() with pytest.raises(ValueError): dp()
def test_no_attribute_no_verb_raises_attribute_error_context_manager(): df = pd.DataFrame({"a": [str(x) for x in (range(10))]}) with pytest.raises(AttributeError): with dppd(df) as (dp, X): dp.shu()
import itertools import hashlib import pypipegraph as ppg import numpy as np import pandas as pd from mbf_qualitycontrol import register_qc, qc_disabled from mbf_genomics.util import parse_a_or_c_to_anno from mbf_genomics.annotator import Annotator import dppd import dppd_plotnine # noqa: F401 dp, X = dppd.dppd() # import pypipegraph as ppg class ComparisonAnnotator(Annotator): def __init__( self, comparisons, group_a, group_b, comparison_strategy, laplace_offset=1 / 1e6, other_groups_for_variance=[], ): """Create a comparison (a - b) """ self.comparisons = comparisons
# forwards for compatibility with old chipseq code from .raw import Sample import pypipegraph as ppg import pysam from pathlib import Path import pandas as pd import collections from dppd import dppd import dppd_plotnine # noqa:F401 - from mbf_qualitycontrol import register_qc, QCCollectingJob, qc_disabled dp, X = dppd() class _BamDerived: def _parse_alignment_job_input(self, alignment_job): if isinstance(alignment_job, (str, Path)): alignment_job = ppg.FileInvariant(alignment_job) if not isinstance(alignment_job, (ppg.FileInvariant, ppg.FileGeneratingJob)): raise ValueError( "alignment_job must be a ppg.FileGeneratingJob or FileChecksumInvariant" "was %s" % (type(alignment_job))) bam_name = None bai_name = None for fn in alignment_job.filenames: if str(fn).endswith(".bam"): if bam_name is None: bam_name = str(fn) else:
def test_context_manager_chain(): with dppd(mtcars) as (dp, X): dp.mutate(kw=X.hp * 0.7457) with dppd(X) as (dp, X): dp.mutate(watt=X.kw * 1000) assert "watt" in X.columns