def test_to_plot_name(self):
        assert parse_a_or_c_to_plot_name("hello") == "hello"
        assert parse_a_or_c_to_plot_name(Constant("shu", 5)) == "shu"
        assert parse_a_or_c_to_plot_name(PolyConstant(["shu", "sha"],
                                                      [5, 10])) == "shu"
        assert (parse_a_or_c_to_plot_name((PolyConstant(["shu", "sha"],
                                                        [5, 10]), 1)) == "sha")
        assert (parse_a_or_c_to_plot_name(
            (PolyConstant(["shu", "sha"], [5, 10]), "sha")) == "sha")
        with pytest.raises(KeyError):
            parse_a_or_c_to_plot_name((PolyConstant(["shu", "sha"],
                                                    [5, 10]), "shi"))
        with pytest.raises(IndexError):
            parse_a_or_c_to_plot_name((PolyConstant(["shu", "sha"],
                                                    [5, 10]), 5))

        with pytest.raises(ValueError):
            parse_a_or_c_to_plot_name(5)
        with pytest.raises(ValueError):
            parse_a_or_c_to_plot_name((Constant("shu", 5), "shu", 3))

        assert (parse_a_or_c_to_plot_name(
            PolyConstant(["shu", "sha"], [5, 10], "hello")) == "hello")
        assert (parse_a_or_c_to_plot_name(
            (PolyConstant(["shu", "sha"], [5, 10],
                          "hello"), "sha")) == "hello")
        assert (parse_a_or_c_to_plot_name(
            (PolyConstant(["shu", "sha"], [5, 10], "hello"), 1)) == "hello")
    def test_multi_plus_filter(self, clear_annotators):
        d = DelayedDataFrame(
            "ex1",
            pd.DataFrame({
                "a1": [1 / 0.99, 2 / 0.99, 3 / 0.99],
                "a2": [1 * 0.99, 2 * 0.99, 3 * 0.99],
                "b1": [2 * 0.99, 8 * 0.99, (16 * 3) * 0.99],
                "b2": [2 / 0.99, 8 / 0.99, (16 * 3) / 0.99],
                "delta": [10, 20, 30],
            }),
        )
        c = Comparisons(d, {"a": ["a1", "a2"], "b": ["b1", "b2"]})
        a = c.a_vs_b("a", "b", Log2FC(), laplace_offset=0)
        anno1 = Constant("shu1", 5)
        anno2 = Constant("shu2", 5)  # noqa: F841
        anno3 = Constant("shu3", 5)  # noqa: F841
        to_test = [
            (("log2FC", "==", -1.0), [-1.0]),
            (("log2FC", ">", -2.0), [-1.0]),
            (("log2FC", "<", -2.0), [-4.0]),
            (("log2FC", ">=", -2.0), [-1.0, -2.0]),
            (("log2FC", "<=", -2.0), [-2.0, -4.0]),
            (("log2FC", "|>", 2.0), [-4.0]),
            (("log2FC", "|<", 2.0), [-1.0]),
            (("log2FC", "|>=", 2.0), [-2.0, -4.0]),
            (("log2FC", "|<=", 2.0), [-1.0, -2.0]),
            ((a["log2FC"], "<", -2.0), [-4.0]),
            (("log2FC", "|", -2.0), ValueError),
            ([("log2FC", "|>=", 2.0), ("log2FC", "<=", 0)], [-2.0, -4.0]),
            ((anno1, ">=", 5), [-1, -2.0, -4.0]),
            (((anno1, 0), ">=", 5), [-1, -2.0, -4.0]),
            (("shu2", ">=", 5), [-1, -2.0, -4.0]),
            (("delta", ">", 10), [-2.0, -4.0]),
        ]
        if not ppg.inside_ppg():  # can't test for missing columns in ppg.
            to_test.extend([(("log2FC_no_such_column", "<", -2.0), KeyError)])
        filtered = {}
        for ii, (f, r) in enumerate(to_test):
            if r in (ValueError, KeyError):
                with pytest.raises(r):
                    a.filter([f], "new%i" % ii)
            else:
                filtered[tuple(f)] = a.filter(
                    [f] if isinstance(f, tuple) else f, "new%i" % ii)
                assert filtered[tuple(f)].name == "new%i" % ii
                force_load(filtered[tuple(f)].annotate(),
                           filtered[tuple(f)].name)

        force_load(d.add_annotator(a), "somethingsomethingjob")
        run_pipegraph()
        c = a["log2FC"]
        assert (d.df[c] == [-1.0, -2.0, -4.0]).all()
        for f, r in to_test:
            if r not in (ValueError, KeyError):
                try:
                    assert filtered[tuple(f)].df[c].values == approx(r)
                except AssertionError:
                    print(f)
                    raise
Exemple #3
0
 def test_annos_same_column_different_anno(self):
     a = DelayedDataFrame(
         "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
     )
     c = Constant("hello", "c")
     a += c
     c = Constant("hello2", "c")
     a += c
     c = Constant("hello2", "d")
     with pytest.raises(ValueError):
         a += c
 def test_simple_from_anno_plus_column_pos(self):
     d = DelayedDataFrame(
         "ex1", pd.DataFrame({
             "a": [1, 2, 3],
             "b": [2, 8, 16 * 3]
         }))
     a = Constant("five", 5)
     b = Constant("ten", 10)
     c = Comparisons(d, {"a": [(a, 0)], "b": [(b, 0)]})
     a = c.a_vs_b("a", "b", Log2FC(), laplace_offset=0)
     force_load(d.add_annotator(a), "fl1")
     run_pipegraph()
     assert (d.df[a["log2FC"]] == [-1, -1, -1]).all()
Exemple #5
0
 def test_annotators_are_kept_on_filtering(self):
     genome = MockGenome(
         pd.DataFrame([
             {
                 "stable_id": "fake1",
                 "chr": "1",
                 "strand": 1,
                 "tss": 5000,
                 "tes": 5500,
                 "description": "bla",
             },
             {
                 "stable_id": "fake2",
                 "chr": "1",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
             {
                 "stable_id": "fake3",
                 "chr": "2",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
         ]))
     g = genes.Genes(genome)
     ca = Constant("shu", 5)
     g.add_annotator(ca)
     filtered = g.filter("nogenes", lambda df: df["chr"] == "4")
     assert filtered.has_annotator(ca)
Exemple #6
0
    def test_random_same_number(self):
        def sample_data():
            return pd.DataFrame({
                "chr": ["1", "2", "1"],
                "start": [10, 100, 1000],
                "stop": [12, 110, 1110],
                "column_that_will_disappear": ["A", "b", "c"],
            })

        def convert(df):
            res = df[["chr", "start", "stop"]]
            res = res.assign(start=res["start"] + 1)
            return res

        if ppg.inside_ppg():
            deps = [ppg.ParameterInvariant("shuParam", ("hello"))]
        else:
            deps = []
        a = regions.GenomicRegions("sharum", sample_data, [],
                                   get_genome_chr_length())
        a.add_annotator(Constant("Constant", 5))
        a.annotate()
        b = a.convert("a+1", convert, dependencies=deps)
        force_load(b.load())
        for d in deps:
            assert d in b.load().lfg.prerequisites
        run_pipegraph()
        assert len(a.df) == len(b.df)
        assert (a.df["start"] == b.df["start"] - 1).all()
        assert "column_that_will_disappear" in a.df.columns
        assert not ("column_that_will_disappear" in b.df.columns)
Exemple #7
0
 def test_forbidden_cache_names(self):
     a = DelayedDataFrame(
         "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
     )
     c1 = Constant("c1*", "*")
     c2 = Constant("c2/", "*")
     c3 = Constant("c3?", "*")
     c4 = Constant("c4" * 100, "*")
     with pytest.raises(ValueError):
         a += c1
     with pytest.raises(ValueError):
         a += c2
     with pytest.raises(ValueError):
         a += c3
     with pytest.raises(ValueError):
         a += c4
Exemple #8
0
    def test_filtering(self):
        class A(Annotator):
            cache_name = "A"
            columns = ["aa"]

            def calc(self, df):
                return pd.DataFrame({self.columns[0]: "a"}, index=df.index)

        class B(Annotator):
            cache_name = "B"
            columns = ["ab"]

            def calc(self, df):
                return df["aa"] + "b"

            def dep_annos(self):
                return [A()]

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )
        a += Constant("C", "c")
        assert "C" in a.df.columns
        b = a.filter("sha", lambda df: df["A"] == 1)
        assert "C" in b.df.columns
        a += A()
        assert "aa" in a.df.columns
        assert "aa" in b.df.columns
        b += B()
        assert "ab" in b.df.columns
        assert not "ab" in a.df.columns
Exemple #9
0
 def test_annotator(self):
     a = DelayedDataFrame(
         "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
     )
     a += Constant("column", "value")
     a.annotate()
     assert "column" in a.df.columns
     assert (a.df["column"] == "value").all()
Exemple #10
0
 def test_annotator_basic(self):
     a = DelayedDataFrame(
         "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
     )
     a += Constant("aa", "aa")
     force_load(a.annotate())
     ppg.run_pipegraph()
     assert (a.df["aa"] == "aa").all()
    def test_to_column(self):
        assert parse_a_or_c_to_column("hello") == "hello"
        assert parse_a_or_c_to_column(Constant("shu", 5)) == "shu"
        assert parse_a_or_c_to_column(PolyConstant(["shu", "sha"],
                                                   [5, 10])) == "shu"
        assert (parse_a_or_c_to_column((PolyConstant(["shu", "sha"],
                                                     [5, 10]), 1)) == "sha")
        assert (parse_a_or_c_to_column(
            (PolyConstant(["shu", "sha"], [5, 10]), "sha")) == "sha")
        assert parse_a_or_c_to_column((None, "shi")) == "shi"
        with pytest.raises(KeyError):
            parse_a_or_c_to_column((PolyConstant(["shu", "sha"],
                                                 [5, 10]), "shi"))
        with pytest.raises(IndexError):
            parse_a_or_c_to_column((PolyConstant(["shu", "sha"], [5, 10]), 5))

        with pytest.raises(ValueError):
            parse_a_or_c_to_column(5)
        with pytest.raises(ValueError):
            parse_a_or_c_to_column((Constant("shu", 5), "shu", 3))
Exemple #12
0
 def test_annotator_coliding_with_non_anno_column(self):
     a = DelayedDataFrame(
         "shu",
         lambda: pd.DataFrame(
             {"A": [1, 2, 3], "B": ["a", "b", "c"], "idx": ["x", "y", "z"]}
         ).set_index("idx"),
     )
     a += Constant("A", "aa")
     lj = a.anno_jobs["A"]
     ppg.JobGeneratingJob("shu", lambda: 55).depends_on(a.annotate())
     with pytest.raises(ppg.RuntimeError):
         ppg.run_pipegraph()
     assert "were already present" in str(lj().exception)
    def test_to_anno(self):
        assert parse_a_or_c_to_anno("hello") is None
        assert parse_a_or_c_to_anno(Constant("shu", 5)) == Constant("shu", 5)
        assert parse_a_or_c_to_anno(PolyConstant(
            ["shu", "sha"], [5, 10])) == PolyConstant(["shu", "sha"], [5, 10])
        assert parse_a_or_c_to_anno(
            (PolyConstant(["shu", "sha"],
                          [5, 10]), 1)) == PolyConstant(["shu", "sha"],
                                                        [5, 10])
        assert parse_a_or_c_to_anno(
            (PolyConstant(["shu", "sha"],
                          [5, 10]), "sha")) == PolyConstant(["shu", "sha"],
                                                            [5, 10])
        with pytest.raises(KeyError):
            parse_a_or_c_to_anno((PolyConstant(["shu", "sha"],
                                               [5, 10]), "shi"))
        with pytest.raises(IndexError):
            parse_a_or_c_to_anno((PolyConstant(["shu", "sha"], [5, 10]), 5))

        with pytest.raises(ValueError):
            parse_a_or_c_to_anno(5)
        with pytest.raises(ValueError):
            parse_a_or_c_to_anno((Constant("shu", 5), "shu", 3))
    def test_find_annos_from_column(self, both_ppg_and_no_ppg_no_qc,
                                    clear_annotators):
        a = Constant("shu", 5)
        assert find_annos_from_column("shu") == [a]
        assert find_annos_from_column("shu")[0] is a
        with pytest.raises(KeyError):
            find_annos_from_column("nosuchcolumn")

        b = PolyConstant(["shu"], [10])
        assert find_annos_from_column("shu") == [a, b]

        if ppg.inside_ppg():
            both_ppg_and_no_ppg_no_qc.new_pipegraph()
            with pytest.raises(KeyError):
                find_annos_from_column("shu")
Exemple #15
0
 def test_multi_level(self):
     a = DelayedDataFrame(
         "shu",
         lambda: pd.DataFrame(
             {"A": [1, 2, 3], "B": ["a", "b", "c"], "idx": ["x", "y", "z"]}
         ).set_index("idx"),
     )
     b = a.filter("sha", lambda df: df["C"] == 4, Constant("C", 4))
     a1 = LenAnno("count")
     b += a1
     c = b.filter("shc", lambda df: df["A"] >= 2)
     a2 = LenAnno("count2")
     c += a2
     c.write()
     ppg.run_pipegraph()
     assert len(c.df) == 2
     assert (c.df["A"] == [2, 3]).all()
     assert (c.df["count"] == "count3").all()
     assert (c.df["count2"] == "count22").all()
Exemple #16
0
    def test_filtering_on_annotator(self):
        class A(Annotator):
            cache_name = "A"
            columns = ["aa"]

            def calc(self, df):
                return pd.DataFrame(
                    {self.columns[0]: (["a", "b"] * int(len(df) / 2 + 1))[: len(df)]},
                    index=df.index,
                )

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )
        with pytest.raises(KeyError):
            b = a.filter("sha", lambda df: df["aa"] == "a")
        b = a.filter("sha", lambda df: df["aa"] == "a", [A()])
        canno = Constant("C", "c")
        a += canno
        b += canno
        assert (b.df["A"] == [1]).all()
Exemple #17
0
 def gen():
     a.add_annotator(Constant("shu", 5))
Exemple #18
0
 def dep_annos(self):
     return [Constant("Nestingconst", 5), Nested()]
Exemple #19
0
 def dep_annos(self):
     return [Constant("Nestedconst", 5)]