Exemple #1
0
    def test_annos_dependening_none(self):
        class A(Annotator):
            cache_name = "hello"
            columns = ["aa"]

            def calc(self, df):
                return pd.DataFrame({self.columns[0]: "a"}, index=df.index)

        class B(Annotator):
            cache_name = "hello2"
            columns = ["ab"]

            def calc(self, df):
                return df["aa"] + "b"

            def dep_annos(self):
                return [None, A(), None]

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )
        a += B()
        a.annotate()
        assert "ab" in a.df.columns
        assert "aa" in a.df.columns
        assert (a.df["ab"] == (a.df["aa"] + "b")).all()
Exemple #2
0
    def test_annos_same_column_different_anno(self):
        count = [0]

        class CountingConstant(Annotator):
            def __init__(self, column_name, value):
                count[0] += 1
                self.columns = [column_name]
                self.value = value

            def calc(self, df):
                return pd.DataFrame({self.columns[0]: self.value}, index=df.index)

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )
        c = CountingConstant("hello", "c")
        a += c
        a.annotate()
        assert "hello" in a.df.columns
        assert count[0] == 1
        c = CountingConstant("hello2", "c")
        a += c
        a.annotate()
        assert "hello2" in a.df.columns
        assert count[0] == 2
        d = CountingConstant("hello2", "d")
        assert c is not d
        with pytest.raises(ValueError):
            a += d
Exemple #3
0
 def test_annotator(self):
     a = DelayedDataFrame(
         "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
     )
     a += Constant("column", "value")
     a.annotate()
     assert "column" in a.df.columns
     assert (a.df["column"] == "value").all()
Exemple #4
0
    def test_annos_dependening(self):
        class A(Annotator):
            cache_name = "hello"
            columns = ["aa"]

            def calc(self, df):
                return pd.DataFrame({self.columns[0]: "a"}, index=df.index)

        class B(Annotator):
            cache_name = "hello2"
            columns = ["ab"]

            def calc(self, df):
                return df["aa"] + "b"

            def dep_annos(self):
                return [A()]

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )
        a += B()
        ppg.JobGeneratingJob("shu", lambda: 55).depends_on(a.annotate())
        ppg.run_pipegraph()
        assert "ab" in a.df.columns
        assert "aa" in a.df.columns
        assert (a.df["ab"] == (a.df["aa"] + "b")).all()
Exemple #5
0
 def test_annotator_basic(self):
     a = DelayedDataFrame(
         "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
     )
     a += Constant("aa", "aa")
     force_load(a.annotate())
     ppg.run_pipegraph()
     assert (a.df["aa"] == "aa").all()
Exemple #6
0
    def test_DynamicColumNames(self):
        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )

        class Dynamic(Annotator):
            @property
            def columns(self):
                return ["a"]

            def calc(self, df):
                return pd.DataFrame({"a": ["x", "y"]})

        a += Dynamic()
        a.annotate()
        assert_frame_equal(
            a.df, pd.DataFrame({"A": [1, 2], "B": ["c", "d"], "a": ["x", "y"]})
        )
 def test_missing_external_genome(self):
     g = DelayedDataFrame("ex",
                          pd.DataFrame({"gene_stable_id": ["a", "c", "b"]}))
     anno = genes.annotators.Description()
     g += anno
     force_load(g.annotate())
     with pytest.raises(ppg.RuntimeError):
         ppg.run_pipegraph()
     assert "ddf had no .genome and no genome was passed to Description" in str(
         g.anno_jobs[anno.get_cache_name()].lfg.exception)
Exemple #8
0
    def test_annos_added_only_once(self):
        count = [0]

        class CountingConstant(Annotator):
            def __init__(self, column_name, value):
                count[0] += 1
                self.columns = [column_name]
                self.value = value

            def calc(self, df):
                return pd.DataFrame({self.columns[0]: self.value}, index=df.index)

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )
        c = CountingConstant("hello", "c")
        a += c
        a.annotate()
        assert "hello" in a.df.columns
        assert count[0] == 1
        a += c  # this get's ignored
Exemple #9
0
 def test_annotator_coliding_with_non_anno_column(self):
     a = DelayedDataFrame(
         "shu",
         lambda: pd.DataFrame(
             {"A": [1, 2, 3], "B": ["a", "b", "c"], "idx": ["x", "y", "z"]}
         ).set_index("idx"),
     )
     a += Constant("A", "aa")
     lj = a.anno_jobs["A"]
     ppg.JobGeneratingJob("shu", lambda: 55).depends_on(a.annotate())
     with pytest.raises(ppg.RuntimeError):
         ppg.run_pipegraph()
     assert "were already present" in str(lj().exception)
Exemple #10
0
    def test_anno_returing_right_length_but_wrong_start_range_index(self):
        a = DelayedDataFrame("shu", lambda: pd.DataFrame({"A": [1, 2, 3]}))

        class BadAnno(Annotator):
            columns = ["X"]

            def calc(self, df):
                return pd.Series(["a", "b", "c"], index=pd.RangeIndex(5, 5 + 3))

        a += BadAnno()
        force_load(a.annotate())
        lj = a.anno_jobs["X"]
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert "Index mismatch" in str(lj().exception)
Exemple #11
0
    def test_anno_not_returning_enough_rows_and_no_index_range_index_on_df(self):
        class BrokenAnno(Annotator):
            columns = ["X"]

            def calc(self, df):
                return pd.DataFrame({"X": [1]})

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]})
        )
        a += BrokenAnno()
        lj = a.anno_jobs["X"]
        ppg.JobGeneratingJob("shu", lambda: 55).depends_on(a.annotate())
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert "Length and index mismatch " in str(lj().exception)
Exemple #12
0
    def test_anno_returning_series(self):
        a = DelayedDataFrame(
            "shu",
            lambda: pd.DataFrame(
                {"A": [1, 2, 3], "B": ["a", "b", "c"], "idx": ["x", "y", "z"]}
            ).set_index("idx"),
        )

        class SeriesAnno(Annotator):
            columns = ["C"]

            def calc(self, df):
                return pd.Series(list(range(len(df))))

        a += SeriesAnno()
        ppg.JobGeneratingJob("shu", lambda: 55).depends_on(a.annotate())
        ppg.run_pipegraph()
        assert (a.df["C"] == [0, 1, 2]).all()
Exemple #13
0
    def test_annotator_raising(self):
        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )

        class RaiseAnno(Annotator):
            columns = ["aa"]
            cache_name = "empty"

            def calc(self, df):
                raise ValueError("hello")

        anno1 = RaiseAnno()
        a += anno1
        force_load(a.annotate())
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        anno_job = a.anno_jobs[RaiseAnno().get_cache_name()]
        assert "hello" in str(anno_job.lfg.exception)
Exemple #14
0
    def test_DynamicColumNames(self):
        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )

        class Dynamic(Annotator):
            @property
            def columns(self):
                return ["a"]

            def calc(self, df):
                return pd.DataFrame({"a": ["x", "y"]})

        a += Dynamic()
        a.anno_jobs[Dynamic().get_cache_name()]
        force_load(a.annotate())
        ppg.run_pipegraph()
        assert_frame_equal(
            a.df, pd.DataFrame({"A": [1, 2], "B": ["c", "d"], "a": ["x", "y"]})
        )
Exemple #15
0
    def test_annotator_missing_columns(self):
        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )

        class MissingColumnNames(Annotator):
            cache_name = "MissingColumnNames"

            def calc(self, df):
                return pd.DataFrame({})

            def __repr__(self):
                return "MissingColumnNames()"

        a += MissingColumnNames()
        lg = a.anno_jobs["MissingColumnNames"]
        force_load(a.annotate())
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert "AttributeError" in repr(lg().lfg.exception)
Exemple #16
0
    def test_anno_returning_string(self):
        a = DelayedDataFrame(
            "shu",
            lambda: pd.DataFrame(
                {"A": [1, 2, 3], "B": ["a", "b", "c"], "idx": ["x", "y", "z"]}
            ).set_index("idx"),
        )

        class SeriesAnno(Annotator):
            columns = ["C", "D"]

            def calc(self, df):
                return "abc"

        a += SeriesAnno()
        lj = a.anno_jobs["C"]
        ppg.JobGeneratingJob("shu", lambda: 55).depends_on(a.annotate())
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert "result was no dataframe" in str(lj().lfg.exception)
Exemple #17
0
    def test_lying_about_columns(self):
        a = DelayedDataFrame(
            "shu",
            lambda: pd.DataFrame(
                {"A": [1, 2, 3], "B": ["a", "b", "c"], "idx": ["x", "y", "z"]}
            ).set_index("idx"),
        )

        class SeriesAnno(Annotator):
            columns = ["C"]

            def calc(self, df):
                return pd.DataFrame({"D": [0, 1, 2]})

        a += SeriesAnno()
        lj = a.anno_jobs["C"]
        ppg.JobGeneratingJob("shu", lambda: 55).depends_on(a.annotate())
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert "declared different " in str(lj().exception)
Exemple #18
0
    def test_annotator_columns_not_list(self):
        class BrokenAnno(Annotator):
            def __init__(
                self,
            ):
                self.columns = "shu"

            def calc(self, df):
                return pd.DataFrame(
                    {self.columns[0]: ["%s%i" % (self.columns[0], len(df))] * len(df)}
                )

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )
        a += BrokenAnno()
        lg = a.anno_jobs[BrokenAnno().get_cache_name()]
        force_load(a.annotate())
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert "list" in str(lg().lfg.exception)
Exemple #19
0
    def test_annotator_empty_columns(self):
        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )

        class EmptyColumnNames(Annotator):
            columns = []
            cache_name = "empty"

            def calc(self, df):
                return pd.DataFrame({"shu": [1, 2]})

            def __repr__(self):
                return "EmptyColumNames()"

        a += EmptyColumnNames()
        force_load(a.annotate())
        anno_job_cb = a.anno_jobs[EmptyColumnNames().get_cache_name()]
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert anno_job_cb() is anno_job_cb()
        assert "anno.columns was empty" in repr(anno_job_cb().exception)
 def test_external_genome(self):
     genome = MockGenome(
         pd.DataFrame({
             "stable_id": ["a", "b", "c"],
             "chr": "1",
             "tss": [0, 100, 1000],
             "tes": [10, 101, 1010],
         }),
         df_genes_meta=pd.DataFrame({
             "gene_stable_id": ["a", "b", "c"],
             "description": ["hello", "world", "!"],
         }).set_index("gene_stable_id"),
     )
     g = DelayedDataFrame("ex",
                          pd.DataFrame({"gene_stable_id": ["a", "c", "b"]}))
     anno = genes.annotators.Description(genome)
     g += anno
     force_load(g.annotate())
     ppg.run_pipegraph()
     assert "description" in g.df.columns
     assert (g.df.sort_values("gene_stable_id")["description"] == [
         "hello", "world", "!"
     ]).all()