def test_no_fa_exception(self, test_data_dir):
     """INSC1003.gff3 has no sequences"""
     gff3_without_fasta = test_data_dir / "INSC1003.gff3"
     with pytest.raises(GFF3FastaException):
         _ = list(
             ParsedAnnotationRecord.parsed_annotation_records_to_model(parse_gff3_embedded_fasta(gff3_without_fasta))
         )
Esempio n. 2
0
 def test_parse_peg10(self, test_data_dir):
     """PEG10 is a human gene with a -1 frameshift"""
     gff3 = test_data_dir / "PEG10_offset_gff3_fasta.gff3"
     gff3_rec = list(
         ParsedAnnotationRecord.parsed_annotation_records_to_model(
             parse_gff3_embedded_fasta(gff3)))[0]
     tx = gff3_rec.genes[0].transcripts[0]
     assert not tx.has_in_frame_stop
 def test_duplicate_sequence(self, test_data_dir):
     fasta = test_data_dir / "INSC1003_extra_contig_duplicate.fa"
     gff3 = test_data_dir / "INSC1003.gff3"
     gff3_with_fasta = test_data_dir / "INSC1003_embedded_extra_contig_duplicate.gff3"
     with pytest.raises(DuplicateSequenceException):
         _ = list(parse_gff3_fasta(gff3, fasta))
     with pytest.raises(DuplicateSequenceException):
         _ = list(parse_gff3_embedded_fasta(gff3_with_fasta))
 def test_gff3_with_embedded_fa_extra_contig(self, test_data_dir):
     """Handle FASTA with sequences not seen in the GFF3"""
     gff3 = test_data_dir / "INSC1003_embedded_extra_contig.gff3"
     recs = list(ParsedAnnotationRecord.parsed_annotation_records_to_model(parse_gff3_embedded_fasta(gff3)))
     assert len(recs) == 2
     assert recs[1].is_empty
     assert recs[0].sequence_name
     assert recs[0].sequence
     assert recs[1].sequence
     assert recs[1].sequence_name == "extraseq"
Esempio n. 5
0
def test_tbl_export_from_gff3(test_data_dir, tmp_path, gff3, expected_tbl):
    gff3 = test_data_dir / gff3
    recs = list(
        ParsedAnnotationRecord.parsed_annotation_records_to_model(
            parse_gff3_embedded_fasta(gff3)))
    tmp = tmp_path / "tmp.tbl"
    with open(tmp, "w") as fh:
        collection_to_tbl(recs,
                          fh,
                          locus_tag_prefix="test",
                          submitter_lab_name="inscripta",
                          random_seed=123)
    with open(tmp) as fh1, open(test_data_dir / expected_tbl) as fh2:
        assert fh1.read() == fh2.read()
Esempio n. 6
0
    def test_parse_inso(self, test_data_dir):
        """This proves we handle frame and phase"""
        gbk = test_data_dir / "insO_frameshift.gbk"
        gff3 = test_data_dir / "insO_frameshift.gff3"

        with open(gbk, "r") as fh:
            gbk_rec = list(
                ParsedAnnotationRecord.parsed_annotation_records_to_model(
                    parse_genbank(fh)))[0]

        gff3_rec = list(
            ParsedAnnotationRecord.parsed_annotation_records_to_model(
                parse_gff3_embedded_fasta(gff3)))[0]

        expected_protein = (
            "MKKRNFSAEFKRESAQLVVDQKYTVADAAKAMDVGLSTMTRWVKQLRDERQGKTPKASPITPEQIEIRKLRKKLQRIEMENEILKKNRP"
            "EKPDGRRAVLRSQVLELHGISHGSAGARSIATMATRRGYQMGRWLAGRLMKELGLVSCQQPTHRYKRGGHEHVAIPNYLERQFAVTEPNQV"
            "WCGDVTYIWTGKRWAYLAVVLDLFARKPVGWAMSFSPDSRLTMKALEMAWETRGKPVGVMFQSDQGSHYTSRQFRQLLWRYRIRQSMSRR"
            "GNCWDNSPMERFFRSLKNEWVPATGYVSFSDAAHAITDYIVGYYSALRPHEYNGGLPPNESENRYWKNSNAEASFS*"
        )
        assert (str(gbk_rec.genes[0].get_primary_protein()) == str(
            gff3_rec.genes[0].get_primary_protein()) == expected_protein)