Python dppd Examples, dppd.dppd Python Examples

Example #1

0

Show file

def test_interleaved_context_managers():
    with dppd(mtcars) as (dpX, X):
        with dppd(diamonds) as (dpY, Y):
            dpX.groupby("cyl")
            dpY.filter_by(Y.cut == "Ideal")
            dpX.summarize(("hp", np.mean, "mean_hp"))
            dpY.summarize(("price", np.max, "max_price"))
    should_X = (mtcars.groupby("cyl")[["hp"]].agg(
        np.mean).rename(columns={"hp": "mean_hp"})).reset_index()
    should_Y = (pd.DataFrame(diamonds[diamonds.cut == "Ideal"].max()[[
        "price"
    ]]).transpose().rename(columns={"price": "max_price"}))
    should_Y["max_price"] = should_Y["max_price"].astype(int)
    assert_frame_equal(X, should_X)
    assert_frame_equal(Y, should_Y)

Example #2

0

Show file

def test_straight_dp_raises():
    dp, X = dppd()
    with pytest.raises(ValueError):
        dp.select(["hp", "cyl"])

    with pytest.raises(ValueError):
        dp.loc[5]

Example #3

0

Show file

def test_forking_context_manager():
    with dppd(mtcars) as (dp, X):
        a = dp.select(["name", "hp", "cyl"])
        b = dp.select("name").head().pd
        c = a.select("hp").head().pd
        dp.head()
    assert_series_equal(c["hp"], mtcars["hp"].head())
    assert_series_equal(b["name"], mtcars["name"].head())
    assert_frame_equal(X, mtcars[["hp"]].head())

Example #4

0

Show file

def test_stacking():
    dp, X = dppd()
    dp(mtcars).select(["name", "hp", "cyl"])
    b = dp(mtcars).select("hp").pd
    assert_frame_equal(b, mtcars[["hp"]])
    assert_frame_equal(X, mtcars[["name", "hp", "cyl"]])
    c = dp.pd
    assert_frame_equal(c, mtcars[["name", "hp", "cyl"]])
    assert X == None  # noqa:E711 since it's the proxy, is will fail

Example #5

0

Show file

def test_grouped_mutate_repeated_keys():
    df = mtcars.copy()
    df.index = list(range(16)) + list(range(16))
    with pytest.raises(ValueError):  # cannot reindex from duplicate axis
        with dppd(df) as (ddf, X):
            ddf.groupby("cyl").mutate(grp_rank={
                grp: sub_df.hp.rank()
                for (grp, sub_df) in X.itergroups()
            })

Example #6

0

Show file

File: test_pandas_forwards.py Project: qicst23/dppd

def test_rename():
    df = pd.DataFrame({
        "a": [str(x) for x in (range(10))],
        "bb": 10,
        "ccc": list(range(20, 30))
    })
    with dppd(df) as (ndf, X):
        ndf.rename(columns={"a": "a2", "bb": "ccc", "ccc": "c2"})
    assert (X.columns == ["a2", "ccc", "c2"]).all()

Example #7

0

Show file

def test_forking():
    dp, X = dppd()
    a = dp(mtcars).select(["name", "hp", "cyl"])
    b = dp.unselect("hp").select(X.name).head().pd
    with pytest.raises(AttributeError):
        c = a.select(X.hp).head().pd
    c = dp(a).select(X.hp).head().pd
    assert_series_equal(c["hp"], mtcars["hp"].head())
    assert_series_equal(b["name"], mtcars["name"].head())
    assert X == None  # noqa:E711 since it's the proxy, is will fail

Example #8

0

Show file

def test_dir():
    from dppd import base

    dp, X = dppd()
    actual = set(dir(dp(mtcars)))
    should_min = set(base.property_registry[pd.DataFrame])
    delta = should_min.difference(actual)
    print(sorted(actual))
    print(sorted(delta))
    assert not len(delta)
    assert len(actual) > len(should_min)

Example #9

0

Show file

def test_context_manager():
    df = pd.DataFrame({
        "a": [str(x) for x in (range(10))],
        "bb": 10,
        "ccc": list(range(20, 30))
    }).set_index("a")
    with dppd(df) as (d, X):
        d.head(5)
        d.tail(1)
    should = df.iloc[4:5]
    assert_frame_equal(X, should)

Example #10

0

Show file

def test_context_manager_totally_to_pandas():
    df = pd.DataFrame({
        "a": [str(x) for x in (range(10))],
        "bb": 10,
        "ccc": list(range(20, 30))
    }).set_index("a")
    with dppd(df) as (d, X):
        d.head(5)
        assert d.shape == (5, 2)
        d.tail(1)
    should = df.iloc[4:5]
    assert_frame_equal(X, should)
    assert isinstance(X, wrapt.ObjectProxy)
    X = X.pd
    assert not isinstance(X, wrapt.ObjectProxy)
    assert_frame_equal(X, should)

Example #11

0

Show file

def test_mixing_context_manager_and_dp():
    with dppd(mtcars) as (dpY, Y):
        dpY.sort_values("hp")
        dp(diamonds).filter_by(X.cut == "ideal")
        dpY.filter_by(Y.cyl.isin([4, 6]))
        actual_diamonds = dp().sort_values("price").head().pd
        actual_mtcars_full = dpY.pd
        dpY.head()
    actual_mtcars = dpY.pd
    should_diamonds = diamonds[diamonds.cut == "ideal"].sort_values(
        "price").head()
    should_mtcars = mtcars.sort_values("hp")
    should_mtcars_full = should_mtcars[should_mtcars["cyl"].isin([4, 6])]
    should_mtcars = should_mtcars_full.head()
    assert_frame_equal(should_diamonds, actual_diamonds)
    assert_frame_equal(should_mtcars, actual_mtcars)
    assert_frame_equal(should_mtcars_full, actual_mtcars_full)

Example #12

0

Show file

File: test_pandas_forwards.py Project: qicst23/dppd

def test_dataframe_subscript():
    with dppd(mtcars) as (dp, X):
        actual = dp.head(5)["name"].pd
    should = mtcars["name"].head(5)
    assert_series_equal(actual, should)

Example #13

0

Show file

def test_dp_on_empty_stack_raises():
    dp, X = dppd()
    with pytest.raises(ValueError):
        dp()

Example #14

0

Show file

def test_no_attribute_no_verb_raises_attribute_error_context_manager():
    df = pd.DataFrame({"a": [str(x) for x in (range(10))]})
    with pytest.raises(AttributeError):
        with dppd(df) as (dp, X):
            dp.shu()

Example #15

0

Show file

import itertools
import hashlib
import pypipegraph as ppg
import numpy as np
import pandas as pd
from mbf_qualitycontrol import register_qc, qc_disabled
from mbf_genomics.util import parse_a_or_c_to_anno
from mbf_genomics.annotator import Annotator
import dppd
import dppd_plotnine  # noqa: F401

dp, X = dppd.dppd()

# import pypipegraph as ppg


class ComparisonAnnotator(Annotator):
    def __init__(
        self,
        comparisons,
        group_a,
        group_b,
        comparison_strategy,
        laplace_offset=1 / 1e6,
        other_groups_for_variance=[],
    ):
        """Create a comparison (a - b)

            """
        self.comparisons = comparisons

Example #16

0

Show file

# forwards for compatibility with old chipseq code

from .raw import Sample
import pypipegraph as ppg
import pysam
from pathlib import Path
import pandas as pd
import collections
from dppd import dppd
import dppd_plotnine  # noqa:F401 -
from mbf_qualitycontrol import register_qc, QCCollectingJob, qc_disabled

dp, X = dppd()


class _BamDerived:
    def _parse_alignment_job_input(self, alignment_job):
        if isinstance(alignment_job, (str, Path)):
            alignment_job = ppg.FileInvariant(alignment_job)
        if not isinstance(alignment_job,
                          (ppg.FileInvariant, ppg.FileGeneratingJob)):
            raise ValueError(
                "alignment_job must be a ppg.FileGeneratingJob or FileChecksumInvariant"
                "was %s" % (type(alignment_job)))
        bam_name = None
        bai_name = None
        for fn in alignment_job.filenames:
            if str(fn).endswith(".bam"):
                if bam_name is None:
                    bam_name = str(fn)
                else:

Example #17

0

Show file

def test_context_manager_chain():
    with dppd(mtcars) as (dp, X):
        dp.mutate(kw=X.hp * 0.7457)
    with dppd(X) as (dp, X):
        dp.mutate(watt=X.kw * 1000)
    assert "watt" in X.columns