def test_dicts(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_ends_at_directive, file=gff_temp) gff_temp.flush() r1 = GFFReader(gff_temp.name) r2 = GFFReader(gff_temp.name) r1.uids.add("Test") assert r1.uids != r2.uids
def test_ids_are_unique(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_non_unique_ids, file=gff_temp) gff_temp.flush() with pytest.raises(Exception): for _ in GFFReader(gff_temp.name): pass
def test_wrong_number_of_fields(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_wrong_number_of_fields, file=gff_temp) gff_temp.flush() with pytest.raises(IOError): for _ in GFFReader(gff_temp.name): pass
def test_ends_mid_comment(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_ends_at_comment, file=gff_temp) gff_temp.flush() for _ in GFFReader(gff_temp.name): result = _ assert result.start == 2
def test_gene_parented(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_gene_parented_CDS, file=gff_temp) gff_temp.flush() for _ in GFFReader(gff_temp.name): pass assert _
def test_invalid_mid_file(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_not_valid_file_content, file=gff_temp) gff_temp.flush() with pytest.raises(AssertionError): for _ in GFFReader(gff_temp.name): pass
def test_with_construct(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_file_content, file=gff_temp) gff_temp.flush() with GFFReader(gff_temp.name) as reader: result = next(reader) assert result.start == 2
def test_gff_gnomon(): num_genes = 0 total_transcripts = 0 total_exons = 0 monoexonic_cds = 0 monoexonic = 0 for gene in GFFReader("tests/unit/data/ncbi_annot.gff"): num_transcripts = 0 for mrna_id, mrna in gene.mrnas.items(): num_coding_exons = len(mrna.cds_exons) if num_coding_exons > 0: total_exons += len(mrna.exons) num_transcripts += 1 if len(mrna.cds_exons) == 1: monoexonic_cds += 1 if len(mrna.exons) == 1: monoexonic += 1 if num_transcripts > 0: num_genes += 1 total_transcripts += num_transcripts assert num_genes == 3 assert total_transcripts == 3 assert monoexonic_cds == 2 assert monoexonic == 2 assert total_exons == 11 s = gene.mrnas["rna-XM_003689506.3"].to_bed(cds_only=True) bed_line = "NW_003789112.1\t4846\t25552\trna-XM_003689506.3\t0\t-\t4846\t25552\t0,0,0\t9\t38,131,245,228,185,186,198,115,315\t0,548,1486,3275,5756,6363,7036,16397,20391" assert s.split('\t')[:12] == bed_line.split('\t')[:12]
def test_cds_w_no_phase(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as tmp: print(gff_CDS_w_no_phase, file=tmp) tmp.flush() for _ in GFFReader(tmp.name): pass for mrna in _.mrnas.values(): for cds in mrna.cds_exons: assert cds.phase != '.'
def test_with_directives(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_file_content_with_directives, file=gff_temp) gff_temp.flush() count = 0 for _ in GFFReader(gff_temp.name): if count == 1: result = _ count += 1 assert result.start == 3306 assert _.uid == "AL1G10030.v2.1"
def test_eden(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_file_eden, file=gff_temp) gff_temp.flush() gene_count = 0 for _ in GFFReader(gff_temp.name): gene_count += 1 assert gene_count == 1 assert _.uid == "gene00001" assert len(_.mrnas) == 3 assert _.mrnas[next(iter(_.mrnas.keys()))].cds_exons[0].uid == "cds00001"
def test_ends_at_directive(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_ends_at_directive, file=gff_temp) gff_temp.flush() for _ in GFFReader(gff_temp.name): result = _ assert result.start == 2 assert result.mrnas[next(iter( result.mrnas.keys()))].attr['Alias'] == ['gene1', 'gene2', 'gnee3'] assert result.mrnas[next(iter( result.mrnas.keys()))].attr['Ontology_term'] == ['t1', 't2'] assert result.attr['Dbxref'] == ['db1']
def test_int_exon_mitochondrial_gff(): genes = {} for gene in GFFReader("tests/unit/data/int_mitochondrial.gff"): genes[gene.uid] = gene assert genes['gene-CYTB'] is not None assert len(genes['gene-CYTB'].mrnas) > 0 assert len(genes['gene-CYTB'].mrnas['gene-CYTB'].exons) > 0 assert genes['gene-CYTB'].mrnas['gene-CYTB'].exons[0].uid[:5] == 'virt_' assert genes['gene-ND1'].mrnas['gene-ND1'].cds_exons[0].uid[:5] != 'virt_' assert genes['gene-ND1'].mrnas['gene-ND1'].exons[0].uid[:5] == 'virt_' assert genes['gene-ND1'].mrnas['gene-ND1'].cds_exons[0].uid[:5] != 'virt_'
def test_eden_integer_chr(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_file_eden_integerableChr, file=gff_temp) gff_temp.flush() gene_count = 0 for _ in GFFReader(gff_temp.name): gene_count += 1 assert gene_count == 1 assert _.uid == "gene00001" assert len(_.mrnas) == 3 assert _.mrnas[next(iter(_.mrnas.keys()))].cds_exons[0].uid == "cds00001" _.print_gff() for mrna in _.mrnas.values(): print(mrna.to_bed(cds_only=True))
def test_check_splicing_sites(self): genes = [g for g in GFFReader("tests/unit/data/refseq/pstrand_c.gff")] genome = pyfaidx.Fasta("tests/unit/data/refseq/pstrand_c.fa") long_intron, short_intron, nc_splicing, intron_len = check_splicing_sites(genome, genes[0].mrnas["rna-XM_625167.6.m1"], 200000, 59) os.remove("tests/unit/data/refseq/pstrand_c.fa.fai") assert nc_splicing == 0 assert short_intron == 1 genes = [g for g in GFFReader("tests/unit/data/refseq/mstrand_c.gff")] genome = pyfaidx.Fasta("tests/unit/data/refseq/mstrand_c.fa") long_intron, short_intron, nc_splicing, intron_len = check_splicing_sites(genome, genes[0].mrnas["rna-XM_392640.7.m1"], 200000) os.remove("tests/unit/data/refseq/mstrand_c.fa.fai") assert nc_splicing == 0 assert short_intron == 0 genes = [g for g in GFFReader("tests/unit/data/refseq/pstrand_nc.gff")] genome = pyfaidx.Fasta("tests/unit/data/refseq/pstrand_nc.fa") long_intron, short_intron, nc_splicing, intron_len = check_splicing_sites(genome, genes[0].mrnas[ "rna-XM_006567179.3.m1"], 200000) os.remove("tests/unit/data/refseq/pstrand_nc.fa.fai") assert nc_splicing == 1 genes = [g for g in GFFReader("tests/unit/data/refseq/mstrand_nc.gff")] genome = pyfaidx.Fasta("tests/unit/data/refseq/mstrand_nc.fa") long_intron, short_intron, nc_splicing, intron_len = check_splicing_sites(genome, genes[0].mrnas[ "rna-XM_026443867.1.m1"], 200000) os.remove("tests/unit/data/refseq/mstrand_nc.fa.fai") assert nc_splicing == 1
def test_get_spliced_cds_seq(self): genes = [g for g in GFFReader("tests/unit/data/refseq/mstrand_e.gff")] genome = pyfaidx.Fasta("tests/unit/data/refseq/mstrand_e.fa") seq, seq_len, intron_len, last_codon = get_spliced_cds_seq(genome, genes[0].mrnas["rna-XM_033456960.1.m1"], True) os.remove("tests/unit/data/refseq/mstrand_e.fa.fai") assert last_codon in stop_codons genes = [g for g in GFFReader("tests/unit/data/refseq/pstrand_e.gff")] genome = pyfaidx.Fasta("tests/unit/data/refseq/pstrand_e.fa") seq, seq_len, intron_len, last_codon = get_spliced_cds_seq(genome, genes[0].mrnas["rna-XM_033441273.1.m1"], True) os.remove("tests/unit/data/refseq/pstrand_e.fa.fai") assert last_codon in stop_codons genes = [g for g in GFFReader("tests/unit/data/refseq/mstrand_e2.gff")] genome = pyfaidx.Fasta("tests/unit/data/refseq/mstrand_e2.fa") seq, seq_len, intron_len, last_codon = get_spliced_cds_seq(genome, genes[0].mrnas["rna-XM_392669.6.m1"], True) os.remove("tests/unit/data/refseq/mstrand_e2.fa.fai") assert last_codon in stop_codons
def test_for_construct(): with tempfile.NamedTemporaryFile("wt", suffix=".gff") as gff_temp: print(gff_file_content, file=gff_temp) gff_temp.flush() count = 0 with GFFReader(gff_temp.name) as parser: for _ in parser: if count == 1: result = _ count += 1 assert count == 3 assert result.start == 3306 assert _.uid == "AL1G10030.v2.1" assert _.mrnas["AL1G10030.t1.v2.1"].attr["Note"] == "cov:100|id:97.65" assert _.mrnas["AL1G10030.t1.v2.1"].fp_utr == 7449 assert _.mrnas["AL1G10030.t1.v2.1"].tp_utr == 7610
def test_bter_CDS(): genes_bed = [] genes_gff = [] for gene_bed in BEDReader("tests/unit/data/bter_top.coding.bed"): genes_bed.append(gene_bed) for gene_gff in GFFReader("tests/unit/data/bter_top.gff"): genes_gff.append(gene_gff) for gene_bed, gene_gff in zip(genes_bed, genes_gff): for mrna_bed, mrna_gff in zip(gene_bed.mrnas.values(), gene_gff.mrnas.values()): bed_string_repr = mrna_bed.to_bed().split('\t') coding_gff_string_repr = mrna_gff.to_bed(cds_only=True).split('\t') bed_string_repr = bed_string_repr[0:4] + bed_string_repr[5:12] coding_gff_string_repr = coding_gff_string_repr[ 0:4] + coding_gff_string_repr[5:12] assert bed_string_repr == coding_gff_string_repr
def test_bter(): genes_bed = [] genes_gff = [] for gene_bed in BEDReader("tests/unit/data/bter_top.bed"): genes_bed.append(gene_bed) for gene_gff in GFFReader("tests/unit/data/bter_top.gff"): genes_gff.append(gene_gff) assert genes_bed == genes_gff for gene_bed, gene_gff in zip(genes_bed, genes_gff): for mrna_bed, mrna_gff in zip(gene_bed.mrnas.values(), gene_gff.mrnas.values()): bed_string_repr = mrna_bed.to_bed().split('\t') gff_string_repr = mrna_gff.to_bed().split('\t') bed_string_repr = bed_string_repr[0:4] + bed_string_repr[5:12] gff_string_repr = gff_string_repr[0:4] + gff_string_repr[5:12] assert bed_string_repr == gff_string_repr
def test_insufficient_permissions_file(): with tempfile.NamedTemporaryFile("wt") as tmp: os.chmod(tmp.name, 0o000) with pytest.raises(PermissionError): _ = GFFReader(tmp.name)
def test_empty_file(): with tempfile.NamedTemporaryFile() as tmp: with pytest.raises(EmptyFileError): for _ in GFFReader(tmp.name): pass
def test_invalid_file(): with pytest.raises(FileNotFoundError): _ = GFFReader("foo")