Ejemplo n.º 1
0
    def test_overlap_genes_raises_on_unequal_genomes(self):
        genome = get_genome("A")
        genomeB = get_genome("B")
        a = genes.Genes(genome)
        b = genes.Genes(genomeB)

        with pytest.raises(ValueError):
            a.overlap_genes(b)
Ejemplo n.º 2
0
    def test_def_twice_alternative_loading_func(self):
        def a():
            return pd.DataFrame(
                {
                    "chr": "1",
                    "start": 100,
                    "stop": 1000,
                    "tss": 100,
                    "tes": 1000,
                    "strand": 1,
                    "name": "gene1",
                    "gene_stable_id": "gene1",
                },
                index=["gene1"],
            )

        def b():
            return pd.DataFrame(
                {
                    "chr": "1",
                    "start": 110,
                    "stop": 1000,
                    "tss": 110,
                    "tes": 1000,
                    "strand": 1,
                    "name": "gene1",
                    "gene_stable_id": "gene1",
                },
                index=["gene1"],
            )

        genome = MockGenome(
            pd.DataFrame([{
                "stable_id": "fake1",
                "chr": "1",
                "strand": 1,
                "tss": 5000,
                "tes": 5500,
                "description": "bla",
            }]))
        gA = genes.Genes(genome,
                         alternative_load_func=a,
                         name="my_genes",
                         result_dir="my_genes")
        assert gA.result_dir.resolve() == Path("my_genes").resolve()
        gA.load()
        gA.load()
        with pytest.raises(ValueError):
            genes.Genes(genome, alternative_load_func=b, name="my_genes")
Ejemplo n.º 3
0
 def test_annotators_are_kept_on_filtering(self):
     genome = MockGenome(
         pd.DataFrame([
             {
                 "stable_id": "fake1",
                 "chr": "1",
                 "strand": 1,
                 "tss": 5000,
                 "tes": 5500,
                 "description": "bla",
             },
             {
                 "stable_id": "fake2",
                 "chr": "1",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
             {
                 "stable_id": "fake3",
                 "chr": "2",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
         ]))
     g = genes.Genes(genome)
     ca = Constant("shu", 5)
     g.add_annotator(ca)
     filtered = g.filter("nogenes", lambda df: df["chr"] == "4")
     assert filtered.has_annotator(ca)
Ejemplo n.º 4
0
    def test_do_load_only_happens_once(self):
        df = pd.DataFrame([{
            "gene_stable_id": "fake1",
            "chr": "1",
            "strand": 1,
            "tss": 5000,
            "tes": 5500,
            "description": "bla",
        }])
        counter = [0]

        def load():
            counter[0] += 1
            return df

        g = genes.Genes(get_genome_chr_length(), load, name="shu")
        if ppg.inside_ppg():
            assert counter[0] == 0
            g.load()
            assert counter[0] == 0
            g.load()
            assert counter[0] == 0
            ppg.run_pipegraph()
        else:
            assert counter[0] == 1
            g.load()
            assert counter[0] == 1
Ejemplo n.º 5
0
    def test_alternative_loading_raises_on_non_int_tes(self):
        df = pd.DataFrame([
            {
                "stable_id": "fake1",
                "chr": "1",
                "strand": 1,
                "tss": 5000,
                "tes": "",
                "description": "bla",
            },
            {
                "stable_id": "fake2",
                "chr": "1",
                "strand": -1,
                "tss": 5400,
                "tes": 4900,
                "description": "bla",
            },
            {
                "stable_id": "fake3",
                "chr": "2",
                "strand": -1,
                "tss": 5400,
                "tes": 4900,
                "description": "bla",
            },
        ])

        with pytest.raises(ValueError):
            g = genes.Genes(get_genome(), lambda: df, name="shu")
            force_load(g.load())
            run_pipegraph()
Ejemplo n.º 6
0
    def test_alternative_loading_raises_on_missing_name(self):
        df = pd.DataFrame([
            {
                "stable_id": "fake1",
                "chr": "1",
                "strand": 1,
                "tss": 5000,
                "tes": 5500,
                "description": "bla",
            },
            {
                "stable_id": "fake2",
                "chr": "1",
                "strand": -1,
                "tss": 5400,
                "tes": 4900,
                "description": "bla",
            },
            {
                "stable_id": "fake3",
                "chr": "2",
                "strand": -1,
                "tss": 5400,
                "tes": 4900,
                "description": "bla",
            },
        ])

        with pytest.raises(ValueError):
            genes.Genes(get_genome(), lambda: df)
Ejemplo n.º 7
0
    def test_write(self):
        g = genes.Genes(get_genome())
        with pytest.raises(ValueError):
            g.write(mangler_function=lambda df: df.tail())
        a = g.write()
        b = g.write("b.xls")
        mangle = lambda df: df.head()  # noqa: E731
        c = g.write("c.xls", mangle)
        # this is ok...
        c = g.write("c.xls", mangle)
        if ppg.util.inside_ppg():  # this is ok outside of ppg
            with pytest.raises(ValueError):
                g.write("c.xls", lambda df: df.tail())
        run_pipegraph()
        afn = a[1]
        bfn = b[1]
        cfn = c[1]

        assert Path(afn).exists()
        assert Path(bfn).exists()
        assert Path(cfn).exists()
        assert_frame_equal(pd.read_csv(afn, sep="\t"), pd.read_excel(bfn))
        assert_frame_equal(
            pd.read_excel(bfn).head(),
            pd.read_excel(cfn),
            check_column_type=False,
            check_dtype=False,
        )
Ejemplo n.º 8
0
    def test_promotorize(self):

        g = genes.Genes(get_genome())
        b = g.convert("b",
                      regions.convert.promotorize(444),
                      on_overlap="ignore")
        force_load(b.load())
        force_load(b.load())
        run_pipegraph()
        assert len(g.df) > 0
        assert len(g.df) == len(b.df) + 1  # we drop one that ends up at 0..0
        assert "strand" in b.df.columns
        # we have to go by index - the order might change
        # convert to list of strings - bug in at, it won't work otherwise
        b_df = b.df.assign(gene_stable_id=[x for x in b.df.gene_stable_id])
        g_df = g.df.assign(gene_stable_id=[x for x in g.df.gene_stable_id])
        b_df = b_df.set_index("gene_stable_id")
        g_df = g_df.set_index("gene_stable_id")
        assert set(b_df.index) == set(
            g_df[1:].index)  # again the one that we dropped

        for ii in b_df.index:
            if g_df.at[ii, "strand"] == 1:
                assert b_df.at[ii, "start"] == max(0, g_df.at[ii, "tss"] - 444)
                assert b_df.at[ii, "stop"] == max(0, g_df.at[ii, "tss"])
            else:
                assert b_df.at[ii, "start"] == max(0, g_df.at[ii, "tss"])
                assert b_df.at[ii, "stop"] == max(0, g_df.at[ii, "tss"] + 444)
Ejemplo n.º 9
0
 def test_get_exons_regions_overlapping(self):
     genome = MockGenome(
         pd.DataFrame([
             {
                 "stable_id": "fake1",
                 "chr": "1",
                 "strand": 1,
                 "tss": 3000,
                 "tes": 4900,
                 "description": "bla",
                 "name": "bla1",
             },
             {
                 "stable_id": "fake2",
                 "chr": "1",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
                 "name": "bla2",
             },
             {
                 "stable_id": "fake3",
                 "chr": "2",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
                 "name": "bla3",
             },
         ]),
         # {transcript_stable_id, gene_stable_id, strand, start, end, exons},
         df_transcripts=pd.DataFrame({
             "transcript_stable_id":
             ["trans1a", "trans1b", "trans2", "trans3"],
             "gene_stable_id": ["fake1", "fake1", "fake2", "fake3"],
             "chr": ["1", "1", "1", "2"],
             "strand": [1, 1, -1, -1],
             "start": [3100, 3000, 4910, 4900],
             "stop": [4900, 4000, 5400, 5400],
             "exons": [
                 [(3100, 4900)],
                 [(3000, 3500), (3300, 3330), (3750, 4000)],
                 [(4910, 5000), (5100, 5400)],
                 [(4900, 5400)],
             ],
         }),
     )
     g = genes.Genes(genome)
     exons = g.regions_exons_overlapping()
     force_load(exons.load())
     run_pipegraph()
     assert (exons.df["start"] == [
         3000, 3100, 3300, 3750, 4910, 5100, 4900
     ]).all()
     assert (exons.df["stop"] == [3500, 4900, 3330, 4000, 5000, 5400,
                                  5400]).all()
     assert (exons.df["chr"] == np.array(
         ["1", "1", "1", "1", "1", "1", "2"])).all()
Ejemplo n.º 10
0
 def test_loading_from_genome_is_singletonic(self):
     genome = get_genome()
     print(genome)
     genesA = genes.Genes(genome)
     genesB = genes.Genes(genome)
     assert genesA is genesB
     filterA = genesA.filter("fa", lambda df: df.index[:10])
     filterAa = genesA.filter("faa", lambda df: df.index[:10])
     filterB = genesB.filter("fab", lambda df: df.index[:10])
     assert not (filterA is genesA)
     assert not (filterAa is filterA)
     assert not (filterAa is filterB)
     with pytest.raises(ValueError):  # can't have a different loading func
         filterB = genesB.filter("fab", lambda df: df.index[:15])
     force_load(filterA.load)
     ppg.run_pipegraph()
     assert len(filterA.df) == 10
Ejemplo n.º 11
0
 def test_difference(self):
     genome = get_genome()
     a = genes.Genes(genome)
     b = a.filter("filtered", lambda df: df.index[:5])
     c = genes.FromDifference("delta", a, b)
     force_load(c.load())
     run_pipegraph()
     assert len(c.df) == len(a.df) - len(b.df)
Ejemplo n.º 12
0
 def test_write_filtered(self):
     g = genes.Genes(get_genome())
     g2 = g.filter("filtered", lambda df: df.index[:2])
     g2.write(Path("filtered.xls").absolute())
     run_pipegraph()
     assert Path("filtered.xls").exists()
     df = pd.read_excel("filtered.xls")
     assert len(df) == 2
     assert "parent_row" in df.columns
     assert (df["parent_row"] == [0, 1]).all()
Ejemplo n.º 13
0
 def test_from_none(self):
     genome = get_genome()
     a = genes.Genes(genome)
     b = a.filter("filtered", lambda df: df.index[:5])
     c = a.filter("filtered2", lambda df: df.index[-5:])
     d = a.filter("filtered3", lambda df: df.index[3:10])
     e = genes.FromNone("delta", [b, c, d])
     force_load(e.load())
     force_load(a.load())
     run_pipegraph()
     assert len(e.df) == len(a.df) - 5 - 5 - 5
Ejemplo n.º 14
0
    def test_overlap(self):
        genome = MockGenome(
            pd.DataFrame([
                {
                    "stable_id": "fake1",
                    "chr": "1",
                    "strand": 1,
                    "tss": 5000,
                    "tes": 5500,
                    "description": "bla",
                },
                {
                    "stable_id": "fake2",
                    "chr": "1",
                    "strand": -1,
                    "tss": 5400,
                    "tes": 4900,
                    "description": "bla",
                },
                {
                    "stable_id": "fake3",
                    "chr": "2",
                    "strand": -1,
                    "tss": 5400,
                    "tes": 4900,
                    "description": "bla",
                },
            ]))
        g = genes.Genes(genome)
        on_chr_1 = g.filter("on_1", lambda df: df["chr"] == "1")
        on_chr_2 = g.filter("on_2", lambda df: df["chr"] == "2")
        one = g.filter("one", lambda df: df["gene_stable_id"] == "fake1")
        force_load(on_chr_1.load())
        force_load(on_chr_2.load())
        force_load(one.load())
        run_pipegraph()
        assert len(on_chr_1.df) == 2
        assert len(on_chr_2.df) == 1
        assert len(one.df) == 1
        assert g.overlap_genes(on_chr_1) == len(on_chr_1.df)
        assert on_chr_1.overlap_genes(g) == len(on_chr_1.df)
        assert on_chr_1.overlap_genes(on_chr_1) == len(on_chr_1.df)
        assert g.overlap_genes(on_chr_2) == len(on_chr_2.df)
        assert on_chr_2.overlap_genes(g) == len(on_chr_2.df)
        assert on_chr_2.overlap_genes(on_chr_2) == len(on_chr_2.df)
        assert g.overlap_genes(one) == len(one.df)
        assert one.overlap_genes(g) == len(one.df)
        assert one.overlap_genes(one) == len(one.df)

        assert on_chr_1.overlap_genes(one) == 1
        assert one.overlap_genes(on_chr_1) == 1

        assert on_chr_1.overlap_genes(on_chr_2) == 0
        assert on_chr_2.overlap_genes(on_chr_1) == 0
Ejemplo n.º 15
0
 def test_basic_loading_from_genome(self):
     g = genes.Genes(get_genome())
     force_load(g.load())
     run_pipegraph()
     assert len(g.df) == 246
     assert (g.df["gene_stable_id"][:3] == [
         "CRP_001", "CRP_002", "CRP_003"
     ]).all()
     assert g.df["gene_stable_id"].iloc[-1] == "CRP_182"
     assert g.df["start"].iloc[-1] == 158_649 - 1
     assert g.df["stop"].iloc[-1] == 159_662
     assert g.df["strand"].iloc[-1] == -1
Ejemplo n.º 16
0
 def test_from_all(self):
     genome = get_genome()
     a = genes.Genes(genome)
     b = a.filter("filtered", lambda df: df.index[:5])
     c = a.filter("filtered2", lambda df: df.index[0:10])
     d = a.filter("filtered3", lambda df: df.index[3:10])
     e = genes.FromAll("delta", [b, c, d])
     force_load(e.load())
     force_load(a.load())
     run_pipegraph()
     assert len(e.df) == 2
     assert list(e.df.gene_stable_id) == list(a.df.gene_stable_id.loc[3:4])
Ejemplo n.º 17
0
 def test_get_intron_regions(self):
     genome = MockGenome(
         pd.DataFrame([
             {
                 "stable_id": "fake1",
                 "chr": "1",
                 "strand": 1,
                 "tss": 3000,
                 "tes": 4900,
                 "description": "bla",
             },
             {
                 "stable_id": "fake2",
                 "chr": "1",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
             {
                 "stable_id": "fake3",
                 "chr": "2",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
         ]),
         # {transcript_stable_id, gene_stable_id, strand, start, end, exons},
         df_transcripts=pd.DataFrame({
             "transcript_stable_id":
             ["trans1a", "trans1b", "trans2", "trans3"],
             "gene_stable_id": ["fake1", "fake1", "fake2", "fake3"],
             "chr": ["1", "1", "1", "2"],
             "strand": [1, 1, -1, -1],
             "start": [3100, 3000, 4900, 4900],
             "stop": [4900, 4000, 5400, 5400],
             "exons": [
                 [(3100, 4900)],
                 [(3000, 3500), (3750, 4000)],
                 [(4900, 5000), (5100, 5400)],
                 [(4900, 5400)],
             ],
         }),
     )
     g = genes.Genes(genome)
     introns = g.regions_introns()
     force_load(introns.load())
     run_pipegraph()
     assert (introns.df["start"] == [3000, 3500, 4000, 5000]).all()
     assert (introns.df["stop"] == [3100, 3750, 4900, 5100]).all()
     # no intronic region on chr 2
     assert (introns.df["chr"] == ["1", "1", "1", "1"]).all()
Ejemplo n.º 18
0
 def test_intersection(self):
     genome = get_genome()
     a = genes.Genes(genome)
     b = a.filter("filtered", lambda df: df.index[:5])
     c = a.filter("filtered2", lambda df: df.index[4:6])
     with pytest.raises(ValueError):
         d = genes.FromIntersection("delta", b, c)
     d = genes.FromIntersection("delta", [b, c])
     force_load(a.load())
     force_load(d.load())
     run_pipegraph()
     assert len(d.df) == 1
     assert list(d.df.gene_stable_id) == list(a.df.gene_stable_id.loc[4:4])
Ejemplo n.º 19
0
 def test_intersection2(self):
     genome = get_genome()
     a = genes.Genes(genome)
     b = a.filter("filtered", lambda df: df.index[:5], vid="AA")
     c = b.filter("filtered2", lambda df: df.index[:1], vid=["BB", "CC"])
     with pytest.raises(ValueError):
         d = genes.FromIntersection("delta", b, c)
     d = genes.FromIntersection("delta", [b, c])
     force_load(a.load())
     force_load(d.load())
     run_pipegraph()
     assert len(d.df) == 1
     assert list(d.df.gene_stable_id) == list(a.df.gene_stable_id.loc[0:0])
     assert "AA" in d.vid
     assert "BB" in d.vid
     assert "CC" in d.vid
Ejemplo n.º 20
0
 def test_from_any(self):
     genome = get_genome()
     a = genes.Genes(genome)
     b = a.filter("filtered", lambda df: df.index[:5])
     c = a.filter("filtered2", lambda df: df.index[-5:])
     d = a.filter("filtered3", lambda df: df.index[10:15])
     e = genes.FromAny("delta", [b, c, d], sheet_name="shu")
     force_load(e.load())
     force_load(a.load())
     run_pipegraph()
     assert len(e.df) == 15
     assert sorted(list(e.df.gene_stable_id)) == sorted(
         list(a.df.gene_stable_id.iloc[:5]) +
         list(a.df.gene_stable_id.iloc[10:15]) +
         list(a.df.gene_stable_id.iloc[-5:]))
     assert "/shu/" in str(e.result_dir)
Ejemplo n.º 21
0
 def test_write_bed_auto_filename(self):
     genome = MockGenome(
         pd.DataFrame([
             {
                 "stable_id": "fake1",
                 "chr": "1",
                 "strand": 1,
                 "tss": 5000,
                 "tes": 5500,
                 "description": "bla",
             },
             {
                 "stable_id": "fake2",
                 "chr": "1",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
             {
                 "stable_id": "fake3",
                 "chr": "2",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
         ]))
     g = genes.Genes(genome, sheet_name="da_genes")
     assert "/da_genes/" in str(g.result_dir)
     sample_filename = g.write_bed()[1]
     run_pipegraph()
     assert len(g.df) > 0
     read = read_bed(sample_filename)
     assert len(read) == len(g.df)
     assert read[0].refseq == b"1"
     assert read[1].refseq == b"1"
     assert read[2].refseq == b"2"
     assert read[0].position == 4900
     assert read[1].position == 5000
     assert read[2].position == 4900
     assert read[0].length == 500
     assert read[1].length == 500
     assert read[2].length == 500
     assert read[0].name == b"fake2"
     assert read[1].name == b"fake1"
     assert read[2].name == b"fake3"
Ejemplo n.º 22
0
 def test_genes_from_file(self, both_ppg_and_no_ppg):
     genome = get_genome()
     a = genes.Genes(genome)
     b = a.filter("filtered", lambda df: df.index[:5])
     b.write(Path("filtered.xls").absolute())
     force_load(b.load())
     print(both_ppg_and_no_ppg)
     run_pipegraph()
     assert not "summit middle" in a.df.columns
     assert not "summit middle" in b.df.columns
     print(both_ppg_and_no_ppg)
     both_ppg_and_no_ppg.new_pipegraph()
     genome = get_genome()
     c = genes.FromFile("reimport", genome, Path("filtered.xls").absolute())
     force_load(c.load())
     run_pipegraph()
     assert_frame_equal(b.df, c.df)
Ejemplo n.º 23
0
    def test_filtering_with_annotator(self):
        import mbf_genomics

        g = genes.Genes(get_genome())

        class CopyAnno(mbf_genomics.annotator.Annotator):
            def __init__(self):
                self.columns = ["copy"]

            def calc(self, df):
                return pd.DataFrame({"copy": df["gene_stable_id"]})

        g += CopyAnno()
        filtered = g.filter("a", ("gene_stable_id", "==", "CRP_003"))
        force_load(filtered.annotate())
        run_pipegraph()
        assert (filtered.df["gene_stable_id"] == ["CRP_003"]).all()
        assert (filtered.df["copy"] == ["CRP_003"]).all()
Ejemplo n.º 24
0
    def test_overlap_genes_requires_two_genes(self):
        genome = get_genome()
        a = genes.Genes(genome)

        def sample_data():
            return pd.DataFrame({
                "chr": ["Chromosome"],
                "start": [1000],
                "stop": [1100]
            })

        b = regions.GenomicRegions("sha", sample_data, [], genome)
        force_load(a.load())
        force_load(b.load())
        run_pipegraph()

        with pytest.raises(ValueError):
            a.overlap_genes(b)
Ejemplo n.º 25
0
    def test_invalid_tes(self):
        def a():
            return pd.DataFrame(
                {
                    "chr": "Chromosome",
                    "tss": 100,
                    "tes": 1000.5,
                    "strand": 1,
                    "name": "gene1",
                    "gene_stable_id": "gene1",
                },
                index=["gene1"],
            )

        genome = get_genome()
        with RaisesDirectOrInsidePipegraph(ValueError):
            genes.Genes(genome,
                        alternative_load_func=a,
                        name="my_genes",
                        result_dir="my_genes").load()
 def test_simple(self):
     genome = MockGenome(
         pd.DataFrame({
             "stable_id": ["a", "b", "c"],
             "chr": "1",
             "tss": [0, 100, 1000],
             "tes": [10, 101, 1010],
         }),
         df_genes_meta=pd.DataFrame({
             "gene_stable_id": ["a", "b", "c"],
             "description": ["hello", "world", "!"],
         }).set_index("gene_stable_id"),
     )
     g = genes.Genes(genome)
     anno = genes.annotators.Description()
     g += anno
     force_load(g.annotate())
     ppg.run_pipegraph()
     assert "description" in g.df.columns
     assert (g.df.sort_values("gene_stable_id")["description"] == [
         "hello", "world", "!"
     ]).all()
 def test_simple(self, tmpdir):
     genome = MockGenome(
         pd.DataFrame({
             "stable_id": ["a", "b", "c"],
             "chr": "1",
             "tss": [0, 100, 1000],
             "tes": [10, 101, 1010],
         }),
         df_genes_meta=pd.DataFrame({
             "gene_stable_id": ["a", "b", "c"],
             "description": ["hello", "world", "!"],
         }).set_index("gene_stable_id"),
     )
     g = genes.Genes(genome)
     df_to_add = pd.DataFrame(
         {
             "testcol": [1, 2, 3],
             "index_vals": ["a", "b", "d"]
         },
         index=["a", "b", "d"])
     tmp_path = Path(tmpdir) / "dump.tsv"
     df_to_add.to_csv(tmp_path, sep="\t", index=False)
     anno = genes.annotators.FromFile(
         tmp_path,
         columns_to_add=["testcol"],
         index_column_table="index_vals",
         index_column_genes="gene_stable_id",
         fill_value=-1,
     )
     g += anno
     force_load(g.annotate())
     ppg.run_pipegraph()
     print(g.df.index)
     print(g.df)
     assert "testcol" in g.df.columns
     assert g.df.loc[0]["testcol"] == 1
     assert g.df.loc[1]["testcol"] == 2
     assert g.df.loc[2]["testcol"] == -1
     assert len(g.df) == 3
Ejemplo n.º 28
0
 def test_filtering_away_works(self):
     genome = MockGenome(
         pd.DataFrame([
             {
                 "stable_id": "fake1",
                 "chr": "1",
                 "strand": 1,
                 "tss": 5000,
                 "tes": 5500,
                 "description": "bla",
             },
             {
                 "stable_id": "fake2",
                 "chr": "1",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
             {
                 "stable_id": "fake3",
                 "chr": "2",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
         ]))
     g = genes.Genes(genome)
     filtered = g.filter("nogenes", lambda df: df["chr"] == "4")
     force_load(filtered.load())
     run_pipegraph()
     assert len(filtered.df) == 0
     assert "start" in filtered.df.columns
     assert "stop" in filtered.df.columns
     assert "tss" in filtered.df.columns
     assert "tes" in filtered.df.columns
     assert "gene_stable_id" in filtered.df.columns
Ejemplo n.º 29
0
 def test_get_tes_regions(self):
     genome = MockGenome(
         pd.DataFrame([
             {
                 "stable_id": "fake1",
                 "chr": "1",
                 "strand": 1,
                 "tss": 3000,
                 "tes": 4900,
                 "description": "bla",
             },
             {
                 "stable_id": "fake2",
                 "chr": "1",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
             {
                 "stable_id": "fake3",
                 "chr": "2",
                 "strand": -1,
                 "tss": 5400,
                 "tes": 4900,
                 "description": "bla",
             },
         ]))
     g = genes.Genes(genome)
     tes = g.regions_tes()
     force_load(tes.load())
     run_pipegraph()
     assert len(tes.df) == 2
     assert (tes.df["start"] == [4900, 4900]).all()
     assert (tes.df["stop"] == tes.df["start"] + 1).all()
     assert (tes.df["chr"] == ["1", "2"]).all()
Ejemplo n.º 30
0
 def test_alternative_loading_raises_on_non_df(self):
     with RaisesDirectOrInsidePipegraph(ValueError):
         g = genes.Genes(get_genome_chr_length(), lambda: None, "myname")
         force_load(g.load())