Ejemplo n.º 1
0
def test_interleaved_context_managers():
    with dppd(mtcars) as (dpX, X):
        with dppd(diamonds) as (dpY, Y):
            dpX.groupby("cyl")
            dpY.filter_by(Y.cut == "Ideal")
            dpX.summarize(("hp", np.mean, "mean_hp"))
            dpY.summarize(("price", np.max, "max_price"))
    should_X = (mtcars.groupby("cyl")[["hp"]].agg(
        np.mean).rename(columns={"hp": "mean_hp"})).reset_index()
    should_Y = (pd.DataFrame(diamonds[diamonds.cut == "Ideal"].max()[[
        "price"
    ]]).transpose().rename(columns={"price": "max_price"}))
    should_Y["max_price"] = should_Y["max_price"].astype(int)
    assert_frame_equal(X, should_X)
    assert_frame_equal(Y, should_Y)
Ejemplo n.º 2
0
def test_straight_dp_raises():
    dp, X = dppd()
    with pytest.raises(ValueError):
        dp.select(["hp", "cyl"])

    with pytest.raises(ValueError):
        dp.loc[5]
Ejemplo n.º 3
0
def test_forking_context_manager():
    with dppd(mtcars) as (dp, X):
        a = dp.select(["name", "hp", "cyl"])
        b = dp.select("name").head().pd
        c = a.select("hp").head().pd
        dp.head()
    assert_series_equal(c["hp"], mtcars["hp"].head())
    assert_series_equal(b["name"], mtcars["name"].head())
    assert_frame_equal(X, mtcars[["hp"]].head())
Ejemplo n.º 4
0
def test_stacking():
    dp, X = dppd()
    dp(mtcars).select(["name", "hp", "cyl"])
    b = dp(mtcars).select("hp").pd
    assert_frame_equal(b, mtcars[["hp"]])
    assert_frame_equal(X, mtcars[["name", "hp", "cyl"]])
    c = dp.pd
    assert_frame_equal(c, mtcars[["name", "hp", "cyl"]])
    assert X == None  # noqa:E711 since it's the proxy, is will fail
Ejemplo n.º 5
0
def test_grouped_mutate_repeated_keys():
    df = mtcars.copy()
    df.index = list(range(16)) + list(range(16))
    with pytest.raises(ValueError):  # cannot reindex from duplicate axis
        with dppd(df) as (ddf, X):
            ddf.groupby("cyl").mutate(grp_rank={
                grp: sub_df.hp.rank()
                for (grp, sub_df) in X.itergroups()
            })
Ejemplo n.º 6
0
def test_rename():
    df = pd.DataFrame({
        "a": [str(x) for x in (range(10))],
        "bb": 10,
        "ccc": list(range(20, 30))
    })
    with dppd(df) as (ndf, X):
        ndf.rename(columns={"a": "a2", "bb": "ccc", "ccc": "c2"})
    assert (X.columns == ["a2", "ccc", "c2"]).all()
Ejemplo n.º 7
0
def test_forking():
    dp, X = dppd()
    a = dp(mtcars).select(["name", "hp", "cyl"])
    b = dp.unselect("hp").select(X.name).head().pd
    with pytest.raises(AttributeError):
        c = a.select(X.hp).head().pd
    c = dp(a).select(X.hp).head().pd
    assert_series_equal(c["hp"], mtcars["hp"].head())
    assert_series_equal(b["name"], mtcars["name"].head())
    assert X == None  # noqa:E711 since it's the proxy, is will fail
Ejemplo n.º 8
0
def test_dir():
    from dppd import base

    dp, X = dppd()
    actual = set(dir(dp(mtcars)))
    should_min = set(base.property_registry[pd.DataFrame])
    delta = should_min.difference(actual)
    print(sorted(actual))
    print(sorted(delta))
    assert not len(delta)
    assert len(actual) > len(should_min)
Ejemplo n.º 9
0
def test_context_manager():
    df = pd.DataFrame({
        "a": [str(x) for x in (range(10))],
        "bb": 10,
        "ccc": list(range(20, 30))
    }).set_index("a")
    with dppd(df) as (d, X):
        d.head(5)
        d.tail(1)
    should = df.iloc[4:5]
    assert_frame_equal(X, should)
Ejemplo n.º 10
0
def test_context_manager_totally_to_pandas():
    df = pd.DataFrame({
        "a": [str(x) for x in (range(10))],
        "bb": 10,
        "ccc": list(range(20, 30))
    }).set_index("a")
    with dppd(df) as (d, X):
        d.head(5)
        assert d.shape == (5, 2)
        d.tail(1)
    should = df.iloc[4:5]
    assert_frame_equal(X, should)
    assert isinstance(X, wrapt.ObjectProxy)
    X = X.pd
    assert not isinstance(X, wrapt.ObjectProxy)
    assert_frame_equal(X, should)
Ejemplo n.º 11
0
def test_mixing_context_manager_and_dp():
    with dppd(mtcars) as (dpY, Y):
        dpY.sort_values("hp")
        dp(diamonds).filter_by(X.cut == "ideal")
        dpY.filter_by(Y.cyl.isin([4, 6]))
        actual_diamonds = dp().sort_values("price").head().pd
        actual_mtcars_full = dpY.pd
        dpY.head()
    actual_mtcars = dpY.pd
    should_diamonds = diamonds[diamonds.cut == "ideal"].sort_values(
        "price").head()
    should_mtcars = mtcars.sort_values("hp")
    should_mtcars_full = should_mtcars[should_mtcars["cyl"].isin([4, 6])]
    should_mtcars = should_mtcars_full.head()
    assert_frame_equal(should_diamonds, actual_diamonds)
    assert_frame_equal(should_mtcars, actual_mtcars)
    assert_frame_equal(should_mtcars_full, actual_mtcars_full)
Ejemplo n.º 12
0
def test_dataframe_subscript():
    with dppd(mtcars) as (dp, X):
        actual = dp.head(5)["name"].pd
    should = mtcars["name"].head(5)
    assert_series_equal(actual, should)
Ejemplo n.º 13
0
def test_dp_on_empty_stack_raises():
    dp, X = dppd()
    with pytest.raises(ValueError):
        dp()
Ejemplo n.º 14
0
def test_no_attribute_no_verb_raises_attribute_error_context_manager():
    df = pd.DataFrame({"a": [str(x) for x in (range(10))]})
    with pytest.raises(AttributeError):
        with dppd(df) as (dp, X):
            dp.shu()
Ejemplo n.º 15
0
import itertools
import hashlib
import pypipegraph as ppg
import numpy as np
import pandas as pd
from mbf_qualitycontrol import register_qc, qc_disabled
from mbf_genomics.util import parse_a_or_c_to_anno
from mbf_genomics.annotator import Annotator
import dppd
import dppd_plotnine  # noqa: F401

dp, X = dppd.dppd()

# import pypipegraph as ppg


class ComparisonAnnotator(Annotator):
    def __init__(
        self,
        comparisons,
        group_a,
        group_b,
        comparison_strategy,
        laplace_offset=1 / 1e6,
        other_groups_for_variance=[],
    ):
        """Create a comparison (a - b)

            """
        self.comparisons = comparisons
Ejemplo n.º 16
0
# forwards for compatibility with old chipseq code

from .raw import Sample
import pypipegraph as ppg
import pysam
from pathlib import Path
import pandas as pd
import collections
from dppd import dppd
import dppd_plotnine  # noqa:F401 -
from mbf_qualitycontrol import register_qc, QCCollectingJob, qc_disabled

dp, X = dppd()


class _BamDerived:
    def _parse_alignment_job_input(self, alignment_job):
        if isinstance(alignment_job, (str, Path)):
            alignment_job = ppg.FileInvariant(alignment_job)
        if not isinstance(alignment_job,
                          (ppg.FileInvariant, ppg.FileGeneratingJob)):
            raise ValueError(
                "alignment_job must be a ppg.FileGeneratingJob or FileChecksumInvariant"
                "was %s" % (type(alignment_job)))
        bam_name = None
        bai_name = None
        for fn in alignment_job.filenames:
            if str(fn).endswith(".bam"):
                if bam_name is None:
                    bam_name = str(fn)
                else:
Ejemplo n.º 17
0
def test_context_manager_chain():
    with dppd(mtcars) as (dp, X):
        dp.mutate(kw=X.hp * 0.7457)
    with dppd(X) as (dp, X):
        dp.mutate(watt=X.kw * 1000)
    assert "watt" in X.columns