Esempio n. 1
0
 def test_compound_overlap_reverse(self):
     self.feature.location = CompoundLocation([FeatureLocation(15, 24, -1),
                                               FeatureLocation(10, 16, -1)])
     assert self.get_sub(0, 1) == FeatureLocation(21, 24, -1)
     assert self.get_sub(2, 4) == CompoundLocation([FeatureLocation(15, 18, -1),
                                                    FeatureLocation(13, 16, -1)])
     assert self.get_sub(4, 5) == FeatureLocation(10, 13, -1)
Esempio n. 2
0
 def test_compound_overlap_forward(self):
     self.feature.location = CompoundLocation([FeatureLocation(10, 16, 1),
                                               FeatureLocation(15, 24, 1)])
     assert self.get_sub(0, 1) == FeatureLocation(10, 13, 1)
     assert self.get_sub(1, 3) == CompoundLocation([FeatureLocation(13, 16, 1),
                                                    FeatureLocation(15, 18, 1)])
     assert self.get_sub(4, 5) == FeatureLocation(21, 24, 1)
Esempio n. 3
0
 def test_bridging_fails(self):
     parts = [
         FeatureLocation(9, 12, strand=1),
         FeatureLocation(0, 3, strand=1)
     ]
     with self.assertRaisesRegex(ValueError, "bridge the record origin"):
         Feature(CompoundLocation(parts, operator="join"),
                 feature_type="test")
     Feature(CompoundLocation(parts[::-1], operator="join"),
             feature_type="test")
Esempio n. 4
0
 def test_mixed_strand(self):
     bio = self.cds.to_biopython()[0]
     for location in [
             CompoundLocation([
                 FeatureLocation(1, 5, strand=-1),
                 FeatureLocation(8, 10, strand=1)
             ]),
             CompoundLocation([
                 FeatureLocation(1, 5, strand=1),
                 FeatureLocation(8, 10, strand=None)
             ])
     ]:
         bio.location = location
         with self.assertRaisesRegex(
                 ValueError, "compound locations with mixed strands"):
             CDSFeature.from_biopython(bio)
Esempio n. 5
0
 def test_compound_reverse(self):
     self.feature.location = CompoundLocation([
         FeatureLocation(21, 27, -1),
         FeatureLocation(12, 15, -1),
         FeatureLocation(0, 6, -1)
     ])
     assert self.get_sub(2, 3) == FeatureLocation(12, 15, -1)
Esempio n. 6
0
 def test_extends_past_before(self):
     self.reverse_strand()
     self.sub_locations[0] = FeatureLocation(BeforePosition(2),
                                             self.sub_locations[0].end,
                                             strand=-1)
     self.cds.location = CompoundLocation(self.sub_locations[::-1])
     new = self.cds.get_sub_location_from_protein_coordinates(0, 7)
     assert new.start == 3
Esempio n. 7
0
    def test_extends_past_after(self):
        self.sub_locations[-1] = FeatureLocation(21,
                                                 AfterPosition(29),
                                                 strand=1)
        self.cds.location = CompoundLocation(self.sub_locations)

        new = self.cds.get_sub_location_from_protein_coordinates(0, 7)
        assert new.end == 27
Esempio n. 8
0
    def test_frameshifted_location(self):
        location = CompoundLocation(
            [FeatureLocation(3, 9, 1),
             FeatureLocation(8, 14, 1)])
        assert len(location) == 12
        seq = Seq("ATGATGAGCCCTCGTCTAGACTACAATGA")
        extracted = location.extract(seq)
        assert extracted == "ATGAGCCCCTCG"
        assert len(extracted) == len(location)
        translation = extracted.translate()
        assert translation == "MSPS"

        cds = CDSFeature(location, locus_tag="test", translation=translation)
        new = cds.get_sub_location_from_protein_coordinates(1, 3)
        assert isinstance(new, CompoundLocation)
        assert len(new.parts) == 2
        assert new.start == 6
        assert new.end == 11
Esempio n. 9
0
 def test_compound(self):
     location = CompoundLocation(
         [FeatureLocation(0, 3),
          FeatureLocation(6, 9)])
     for good in ["A", "AA"]:
         assert _is_valid_translation_length(good, location)
     assert not _is_valid_translation_length("AAA", location)
     # and with an ambiguous end, that becomes ok
     location = CompoundLocation(
         [FeatureLocation(0, 3),
          FeatureLocation(6, AfterPosition(11))])
     assert _is_valid_translation_length("AAA", location)
     # and reversed ambiguous end
     location = CompoundLocation([
         FeatureLocation(BeforePosition(0), 3, -1),
         FeatureLocation(6, 9, -1)
     ])
     for good in ["A", "AA", "AAA"]:
         assert _is_valid_translation_length(good, location)
Esempio n. 10
0
 def test_compound_reverse(self):
     for position_type in self.position_types:
         old = CompoundLocation([FeatureLocation(15, position_type(17), -1),
                                 FeatureLocation(5, 12, -1)])
         for offset in range(-2, 3):
             new = adjust(old, offset)
             assert isinstance(new.end, position_type)
             assert new.end == old.end + offset
             assert new.parts[0].start is old.parts[0].start
             for old_part, new_part in zip(old.parts[1:], new.parts[1:]):
                 assert old_part is new_part
Esempio n. 11
0
 def test_compound_forward(self):
     for position_type in self.position_types:
         old = CompoundLocation([FeatureLocation(position_type(5), 12, 1),
                                 FeatureLocation(15, 17, 1)])
         for offset in range(-2, 3):
             new = adjust(old, offset)
             assert isinstance(new.start, position_type)
             assert new.start == old.start + offset
             assert new.parts[0].end is old.parts[0].end
             for old_part, new_part in zip(old.parts[1:], new.parts[1:]):
                 assert old_part is new_part
Esempio n. 12
0
 def reverse_strand(self):
     self.magic = self.magic.reverse_complement()
     self.magic_split = self.magic_split.reverse_complement()
     self.sub_locations = [
         FeatureLocation(loc.start, loc.end, strand=loc.strand * -1)
         for loc in self.sub_locations
     ]
     self.location = CompoundLocation(
         self.sub_locations[::self.sub_locations[0].strand])
     self.cds = CDSFeature(self.location,
                           locus_tag="compound",
                           translation="A")
Esempio n. 13
0
 def setUp(self):
     self.magic_split = Seq("ATGGCAxxxxxxGGTxxxxxxATTTGT")
     self.magic = Seq("ATGGCAGGTATTTGT")
     self.translation = "MAGIC"
     self.sub_locations = [
         FeatureLocation(0, 6, strand=1),
         FeatureLocation(12, 15, strand=1),
         FeatureLocation(21, 27, strand=1)
     ]
     self.location = CompoundLocation(self.sub_locations)
     self.cds = CDSFeature(self.location,
                           locus_tag="compound",
                           translation="A")
Esempio n. 14
0
 def test_complicated(self):
     parts = [
         FeatureLocation(121124, 122061, 1),
         FeatureLocation(122339, 122383, 1),
         FeatureLocation(122559, 122666, 1),
         FeatureLocation(122712, 122874, 1),
         FeatureLocation(123060, 123337, 1),
         FeatureLocation(123481, 123749, 1),
         FeatureLocation(123809, 124032, 1),
         FeatureLocation(124091, 124193, 1),
         FeatureLocation(124236, 124401, 1),
         FeatureLocation(124684, 124724, 1)
     ]
     location = CompoundLocation(parts, operator="join")
     cds = CDSFeature(location, locus_tag="complicated", translation="A")
     seq = (
         "ATGAGCCCTCGTCTAGACTACAATGAAGGATACGATTCCGAAGACGAGGAGATCCCCCGTTACGTACACCAT"
         "TCTAGAGGAAAGAGTCATAGATCCGTGAGGACGTCAGGTCGCTCACGCACGTTGGATTACGACGGGGATGAT"
         "GAAGCTAGTGACCACGCTGCCCCCTCCGGGATTGATCGGGACGCTCGAGCCTGTCCAACATCTCGCAGATAT"
         "ACTGATGACTGCCTTGAGACACATAAATTTCGAGGTGCCCGCTCCTCTCGCTCCCGTGGACGAACCGATGAT"
         "AACAAGGTTTTGTACTACACCAAGTATCGCAGCCCGGCTAAGGACTTGCCTATCGAGCGTGATCCCGAGGGT"
         "ATTAATTTATTCAAGGTCCGACAGCACACACGGCCAAGTGACGCTCATGTGCCCAGTGGATACCGTGAGCCC"
         "TACGAAGTCAAGGTCGACGAGTATGAGGATGATCATCCCCGTACATGCACTAGCCGCCGTGACTCTAGACAG"
         "CCGAAAGTCTACAAGGTCCGGGTTGATGAGTACGAGGATAACCTCCCTGCACGCTCTCACACTGACTTTCGC"
         "GAGTCTCCACGGTCTGAAAGATGCTCTAGCCGCTACACCGAGGACTCGAAGCCTGGGGAGCTTCCTCCCCGC"
         "TCAGGGCCCTGTCGGTCCAGCAGGCCTTCTCCGGTCGATGAGGACGTCGAGTATGAGATCCGTGAGCCCCGA"
         "GGGCATCGCTCCAGTCGACACTCTACAGATGTTGACTTTCAGCCAGTAGAACAACATCCTCGCTTTGGACAA"
         "CGTGGACTCAGCAGACCTTCGCGGGTTGATGAGGAAGTCGATTATGAGATCCGTGAGCCCCGTGGCAATCGT"
         "GTCAGTCACGCTGCTCATGGTGACAGCCCCTGTCAGGACCAAAGCTCCAGGCATATCGGCATTCAATTGTGG"
         "AGTACGCGCGGACCCCGGGCGGCTGGCCGTGGCCGGGGTCCTGATGAGTCTGACGATGTTGAGCCCTAGGCA"
         "GGGAATTGCCGTAATGCTCTTCAAACTGTATAGCAAGCTCAGCATCAATTCTTTAACTGGCAGGCGCTCTGC"
         "TCGCGCGTTTCTCTCTTGGGGTGGTTGGTTTGACTGTAGATTTCCTCTTTCAAGGCTTCTAGATACACCTTT"
         "GGAAGATAGCAACGCTATGCAAGATATTTTTGATAATTCAAATCCTTTTTACACATGGAATAGCTGGTGTTC"
         "CTGTTTTATCTAGGCAATTGACCCACGCCATCTCGGTAGGTACGGTAAAAGCAAGCCGTAATCTCGTATGGC"
         "TTCATCCTTAGCATCGTATAGATCTCCACTCGGGACTCGGCCAGGGATCTTCCATCAATCAACGTGAAGAAG"
         "TCCAGCACCCCGCTGAATCATAATATCCTACCGATTCTGCTCTCTTCACCTCTAGATACCCCTCTAGACTCC"
         "TGTCAACATGTTCCGTACAGTCGAAGACCGCCCGACCCCAAAAGAGGTATATAACTGGCGGCTGTACACCGA"
         "GGCCACCATCATTGCCACTGGTACACTCTTGTGAGTAGGTGCTGTTGTAACGAAAAACATCCAACTGATCCG"
         "CCAGGTTCGGCTATGACTCGGCTTTTGTGGGAACTACCATTGCCCGCCAAAGCTTCGTTGATGCCTTCAACA"
         "TCGTCGAGTCGGAGGCGGCGGATATTTCAAGCAATATCACGTCAACCTTTCAGGCCGGCGCATTTTTCGGCG"
         "CCATCTTCTGCTTCTTGCCTGAGTGAAGCCGTTAGAGACGGTCTCACTGGCTAACCGGACCAAGTGACCGAC"
         "AAAATTGGGCGTAAATGGGCCCTTCAGGCAAACACACTGCTGTTTCTTATTGGCGCGATTGTGATGACGGCT"
         "GCAACACATCACCTTTCCTATATATGTAAGTCATATCCCCGTAGTAGTCAAGGTTGTTAACTAGAGCAGATG"
         "CTGGACGAGCTCTCACCGGCATCGCATGCGGCGCTATCACCGCGACCGTCCCCAGCTATATTGCCGAGCTGT"
         "CAATCGTGTCGATCCGGGGCTTCCTCACCGGGTTCTTCGAAGTCGCATACCAGATTGGTAGCTTGGTTGGAT"
         "TCTGGATCAACTATGGCATTAACGAGAACATGGACAACTCCTCGGCCGCAAGCTGGAGAGTGCCTATGGCAG"
         "TCCAGATCATCCCCGCAGGAGTCCTTTTCATTGGTGGCTTTTCCTCCATGAGAGTCCTCTCTGGCTGATGCG"
         "AAAAGACAGTGAGGATGCCGCGACGGCTGCCCTGGAGGCGTTGAGGAAACTGCCACGGTCTCATCAATGTAA"
         "TCTCCCACCAAGACTCAGGACATAGTCCCATGCTGACTATTTTAGATGTCCAGGAAGACATCGAGATGAACC"
         "GCACCAGGCTGCTGGAGGAAGCTCGGATCGCCGAGAAGTACGGACAAGGTTGGTTGGCATATATCCGAGGCG"
         "CACTCTTCGAGCTCTCGCGCCATGGGATGTGGAATCGTGTTCTGCTCGTCCTCTGTGCCTTTGCACTGCAGA"
         "ATATGTCGGGAGCTGCTGCTATCAACTACTATTCCCCCATACTCTTTGCGTCGTTGGGGATCACTGATGTCG"
         "CTCTGTATACAGGTATTTATGGCCTGGTAAAAGGTAAGTTCTTCTCCTTAAGTATCTCTGGCTGACAATAGG"
         "GATTAACTGATGAGTTTACAGCCGTCGCATCAATTATATTCTACGGCATTCTCATTGATATGTGGGGCCGCC"
         "GACGTCCGACCATTGTTTCGTCACTGGCCTGCCCTCTATGTCTCTGGTTTGTGGGTGCATACGTCAAAGTTG"
         "GGCATCCAGCCGATATCATAGACGCCGGCGGGGAATTGTCCCCCTCCACGGAGGCTGGTGGTAGAGCGGCGA"
         "CTGCGATGATTATGATCTACTCCGTCTTGTAAGTGCCCCTCACTTTTGAATGGGCTTCAGCTTGGAACTCGA"
         "GTAACTGGTATCCAGTTGGTCTTTTGGTCTCAACGGTATCCCCTGGATTGTCTCCGCCGAAATCTTCCCCGG"
         "CGCGCTGCGAAATCTCACGGGGACATGGGCTGCGCTGGTGCAATGGTATGCAATTCCCTTCACCTAGTATCC"
         "ATATCTAAATCAGCAGGTTGATCCAATTCGTTATCACCAAAGCTCTCCCGTACATCTTCAATAGCCTTGGGT"
         "ACGGGACGTGGTTCTTCTTCGCCTCCTGGATGCTGCTCGCTATCATTTGGTCATTCTTTTTTCTCCCGGAAA"
         "CCAAGGGGAAGACTCTCGATGAAATGCATACGATCTTGTACGTTTCTCTCCGTCGAAATGTGGTCTTGGCTA"
         "ATGAATCAGCGGCCATTCTCTCGCCGAAGAGCAGGGTAAGGGTGAGGTTCGAGATAACACTACTAAAAGTGA"
         "TCGGGAGGCTGTCTAGTCCAGTAGTTCTAGAGGACTATTGGCTGGATGATTCCTCTGATGATTTTTGATTGG"
         "TGGTGAAAATGTTGGATGTTTAATGCCAATGTACTGGGAGAGAACATGCCGATAGTACATACCGCTGTGTTG"
         "TATATCGAAGACGGTTGATTTATATATCTTAGTCTTTCAAAAGACGGCACTCACACAATCACACTTCGATGA"
     )
     translation = (
         "MSPRLDYNEGYDSEDEEIPRYVHHSRGKSHRSVRTSGRSRTLDYDGDDEASDHAAPSGIDRDAR"
         "ACPTSRRYTDDCLETHKFRGARSSRSRGRTDDNKVLYYTKYRSPAKDLPIERDPEGINLFKVRQ"
         "HTRPSDAHVPSGYREPYEVKVDEYEDDHPRTCTSRRDSRQPKVYKVRVDEYEDNLPARSHTDFR"
         "ESPRSERCSSRYTEDSKPGELPPRSGPCRSSRPSPVDEDVEYEIREPRGHRSSRHSTDVDFQPV"
         "EQHPRFGQRGLSRPSRVDEEVDYEIREPRGNRVSHAAHGDSPCQDQSSRHIGIQLWTGVPVLSR"
         "QLTHAISTPVNMFRTVEDRPTPKEVYNWRLYTEATIIATGTLLFGYDSAFVGTTIARQSFVDAF"
         "NIVESEAADISSNITSTFQAGAFFGAIFCFLPEADAGRALTGIACGAITATVPSYIAELSIVSI"
         "RGFLTGFFEVAYQIGSLVGFWINYGINENMDNSSAASWRVPMAVQIIPAGVLFIGGFSSMREDI"
         "EMNRTRLLEEARIAEKYGQGWLAYIRGALFELSRHGMWNRVLLVLCAFALQNMSGAAAINYYSP"
         "ILFASLGITDVALYTGIYGLVKAVASIIFYGILIDMWGRRRPTIVSSLACPLCLWFVGAYVKVG"
         "HPADIIDAGGELSPSTEAGGRAATAMIMIYSVFWSFGLNGIPWIVSAEIFPGALRNLTGTWAAL"
         "VQWLIQFVITKALPYIFNSLGYGTWFFFASWMLLAIIWSFFFLPETKGKTLDEMHTIFLSKDGT"
         "HTITLR")
     new = cds.get_sub_location_from_protein_coordinates(353, 412)
     # pad the beginning to match the location
     assert new.extract(Seq("x" * location.start +
                            seq)).translate() == translation[353:412]