def test_ucsc_gencode_gtf(): with TemporaryDirectory() as tmpdir: gtf = GTF(UCSC_GENCODE_PATH, cache_directory_path=tmpdir) df = gtf.dataframe(save_to_disk=False) exons = df[df["feature"] == "exon"] # expect 12 exons from the dataframe assert len(exons) == 12, "Expected 12 exons, got %d: %s" % (len(exons), exons) df2 = gtf.dataframe(save_to_disk=True) assert len(df) == len(df2), "Got different length DataFrame" assert list(df.columns) == list(df2.columns)
def test_ucsc_gencode_gtf(): with TemporaryDirectory() as tmpdir: gtf = GTF(UCSC_GENCODE_PATH, cache_directory_path=tmpdir) df = gtf.dataframe(save_to_disk=False) exons = df[df["feature"] == "exon"] # expect 12 exons from the dataframe assert len(exons) == 12, \ "Expected 12 exons, got %d: %s" % (len(exons), exons) df2 = gtf.dataframe(save_to_disk=True) assert len(df) == len(df2), "Got different length DataFrame" assert list(df.columns) == list(df2.columns)
def test_ucsc_refseq_gtf(): """ Test GTF object with a small RefSeq GTF file downloaded from http://genome.ucsc.edu/cgi-bin/hgTables """ with TemporaryDirectory() as tmpdir: gtf = GTF(UCSC_REFSEQ_PATH, cache_directory_path=tmpdir) df = gtf.dataframe(save_to_disk=False) exons = df[df["feature"] == "exon"] # expect 16 exons from the GTF assert len(exons) == 16, "Expected 16 exons, got %d: %s" % (len(exons), exons) df2 = gtf.dataframe(save_to_disk=True) assert len(df) == len(df2), "Got different length DataFrame" assert list(df.columns) == list(df2.columns)
def test_ucsc_refseq_gtf(): """ Test GTF object with a small RefSeq GTF file downloaded from http://genome.ucsc.edu/cgi-bin/hgTables """ with TemporaryDirectory() as tmpdir: gtf = GTF(UCSC_REFSEQ_PATH, cache_directory_path=tmpdir) df = gtf.dataframe(save_to_disk=False) exons = df[df["feature"] == "exon"] # expect 16 exons from the GTF assert len(exons) == 16, \ "Expected 16 exons, got %d: %s" % ( len(exons), exons) df2 = gtf.dataframe(save_to_disk=True) assert len(df) == len(df2), "Got different length DataFrame" assert list(df.columns) == list(df2.columns)
def test_gtf_creates_csv_files_in_cache_dir(): """ Make sure that GTF obeys the cache_dir argument by creating all of its index files there. """ with TemporaryDirectory() as tmpdir: gtf_object = GTF(gtf_path, cache_directory_path=tmpdir) csv_path = gtf_object.data_subset_path() assert tmpdir in csv_path, \ "Expected CSV path %s to contain cache_dir" % csv_path search_pattern = join(tmpdir, "*mouse*") assert len(glob(search_pattern)) == 0, \ "Temporary directory should be empty" # creating the dataframe should have the effect of triggering # GTF parsing and then saving the parsed results in a csv file gtf_object.dataframe() assert len(glob(search_pattern)) > 0, \ "Expected GTF to save files in cache_dir"
def test_gtf_creates_csv_files_in_cache_dir(): """ Make sure that GTF obeys the cache_dir argument by creating all of its index files there. """ with TemporaryDirectory() as tmpdir: gtf_object = GTF(gtf_path, cache_directory_path=tmpdir) csv_path = gtf_object.data_subset_path() assert tmpdir in csv_path, \ "Expected CSV path %s to contain cache_dir" % csv_path search_pattern = join(tmpdir, "*mouse*") assert len(glob(search_pattern)) == 0, \ "Temporary directory should be empty" # creating the dataframe should have the effect of triggering # GTF parsing and then saving the parsed results in a csv file gtf_object.dataframe() assert len(glob(search_pattern)) > 0, \ "Expected GTF to save files in cache_dir"