Beispiel #1
0
def get_human_22_fake_genome():
    from mbf_genomics.testing import MockGenome
    import gzip

    genes = pd.read_msgpack(
        gzip.GzipFile(get_sample_data(Path("mbf_align/hs_22_genes.msgpack.gz")))
    ).reset_index()
    tr = pd.read_msgpack(
        gzip.GzipFile(get_sample_data(Path("mbf_align/hs_22_transcripts.msgpack.gz")))
    ).reset_index()
    return MockGenome(df_genes=genes, df_transcripts=tr, chr_lengths={"22": 50_818_468})
def get_human_22_fake_genome():
    import gzip

    genes = pd.read_msgpack(
        gzip.GzipFile(
            mbf_sampledata.get_sample_path("mbf_align/hs_22_genes.msgpack.gz")
        )
    ).reset_index()
    tr = pd.read_msgpack(
        gzip.GzipFile(
            mbf_sampledata.get_sample_path("mbf_align/hs_22_transcripts.msgpack.gz")
        )
    ).reset_index()
    genes["chr"] = "chr22"
    tr["chr"] = "chr22"
    return MockGenome(
        df_genes=genes, df_transcripts=tr, chr_lengths={"chr22": 50_818_468}
    )
 def test_simple(self):
     genome = MockGenome(
         pd.DataFrame({
             "stable_id": ["a", "b", "c"],
             "chr": "1",
             "tss": [0, 100, 1000],
             "tes": [10, 101, 1010],
         }),
         df_genes_meta=pd.DataFrame({
             "gene_stable_id": ["a", "b", "c"],
             "description": ["hello", "world", "!"],
         }).set_index("gene_stable_id"),
     )
     g = genes.Genes(genome)
     anno = genes.annotators.Description()
     g += anno
     force_load(g.annotate())
     ppg.run_pipegraph()
     assert "description" in g.df.columns
     assert (g.df.sort_values("gene_stable_id")["description"] == [
         "hello", "world", "!"
     ]).all()
 def test_simple(self, tmpdir):
     genome = MockGenome(
         pd.DataFrame({
             "stable_id": ["a", "b", "c"],
             "chr": "1",
             "tss": [0, 100, 1000],
             "tes": [10, 101, 1010],
         }),
         df_genes_meta=pd.DataFrame({
             "gene_stable_id": ["a", "b", "c"],
             "description": ["hello", "world", "!"],
         }).set_index("gene_stable_id"),
     )
     g = genes.Genes(genome)
     df_to_add = pd.DataFrame(
         {
             "testcol": [1, 2, 3],
             "index_vals": ["a", "b", "d"]
         },
         index=["a", "b", "d"])
     tmp_path = Path(tmpdir) / "dump.tsv"
     df_to_add.to_csv(tmp_path, sep="\t", index=False)
     anno = genes.annotators.FromFile(
         tmp_path,
         columns_to_add=["testcol"],
         index_column_table="index_vals",
         index_column_genes="gene_stable_id",
         fill_value=-1,
     )
     g += anno
     force_load(g.annotate())
     ppg.run_pipegraph()
     print(g.df.index)
     print(g.df)
     assert "testcol" in g.df.columns
     assert g.df.loc[0]["testcol"] == 1
     assert g.df.loc[1]["testcol"] == 2
     assert g.df.loc[2]["testcol"] == -1
     assert len(g.df) == 3