def test_compound_overlap_reverse(self): self.feature.location = CompoundLocation([FeatureLocation(15, 24, -1), FeatureLocation(10, 16, -1)]) assert self.get_sub(0, 1) == FeatureLocation(21, 24, -1) assert self.get_sub(2, 4) == CompoundLocation([FeatureLocation(15, 18, -1), FeatureLocation(13, 16, -1)]) assert self.get_sub(4, 5) == FeatureLocation(10, 13, -1)
def test_compound_overlap_forward(self): self.feature.location = CompoundLocation([FeatureLocation(10, 16, 1), FeatureLocation(15, 24, 1)]) assert self.get_sub(0, 1) == FeatureLocation(10, 13, 1) assert self.get_sub(1, 3) == CompoundLocation([FeatureLocation(13, 16, 1), FeatureLocation(15, 18, 1)]) assert self.get_sub(4, 5) == FeatureLocation(21, 24, 1)
def test_bridging_fails(self): parts = [ FeatureLocation(9, 12, strand=1), FeatureLocation(0, 3, strand=1) ] with self.assertRaisesRegex(ValueError, "bridge the record origin"): Feature(CompoundLocation(parts, operator="join"), feature_type="test") Feature(CompoundLocation(parts[::-1], operator="join"), feature_type="test")
def test_mixed_strand(self): bio = self.cds.to_biopython()[0] for location in [ CompoundLocation([ FeatureLocation(1, 5, strand=-1), FeatureLocation(8, 10, strand=1) ]), CompoundLocation([ FeatureLocation(1, 5, strand=1), FeatureLocation(8, 10, strand=None) ]) ]: bio.location = location with self.assertRaisesRegex( ValueError, "compound locations with mixed strands"): CDSFeature.from_biopython(bio)
def test_compound_reverse(self): self.feature.location = CompoundLocation([ FeatureLocation(21, 27, -1), FeatureLocation(12, 15, -1), FeatureLocation(0, 6, -1) ]) assert self.get_sub(2, 3) == FeatureLocation(12, 15, -1)
def test_extends_past_before(self): self.reverse_strand() self.sub_locations[0] = FeatureLocation(BeforePosition(2), self.sub_locations[0].end, strand=-1) self.cds.location = CompoundLocation(self.sub_locations[::-1]) new = self.cds.get_sub_location_from_protein_coordinates(0, 7) assert new.start == 3
def test_extends_past_after(self): self.sub_locations[-1] = FeatureLocation(21, AfterPosition(29), strand=1) self.cds.location = CompoundLocation(self.sub_locations) new = self.cds.get_sub_location_from_protein_coordinates(0, 7) assert new.end == 27
def test_frameshifted_location(self): location = CompoundLocation( [FeatureLocation(3, 9, 1), FeatureLocation(8, 14, 1)]) assert len(location) == 12 seq = Seq("ATGATGAGCCCTCGTCTAGACTACAATGA") extracted = location.extract(seq) assert extracted == "ATGAGCCCCTCG" assert len(extracted) == len(location) translation = extracted.translate() assert translation == "MSPS" cds = CDSFeature(location, locus_tag="test", translation=translation) new = cds.get_sub_location_from_protein_coordinates(1, 3) assert isinstance(new, CompoundLocation) assert len(new.parts) == 2 assert new.start == 6 assert new.end == 11
def test_compound(self): location = CompoundLocation( [FeatureLocation(0, 3), FeatureLocation(6, 9)]) for good in ["A", "AA"]: assert _is_valid_translation_length(good, location) assert not _is_valid_translation_length("AAA", location) # and with an ambiguous end, that becomes ok location = CompoundLocation( [FeatureLocation(0, 3), FeatureLocation(6, AfterPosition(11))]) assert _is_valid_translation_length("AAA", location) # and reversed ambiguous end location = CompoundLocation([ FeatureLocation(BeforePosition(0), 3, -1), FeatureLocation(6, 9, -1) ]) for good in ["A", "AA", "AAA"]: assert _is_valid_translation_length(good, location)
def test_compound_reverse(self): for position_type in self.position_types: old = CompoundLocation([FeatureLocation(15, position_type(17), -1), FeatureLocation(5, 12, -1)]) for offset in range(-2, 3): new = adjust(old, offset) assert isinstance(new.end, position_type) assert new.end == old.end + offset assert new.parts[0].start is old.parts[0].start for old_part, new_part in zip(old.parts[1:], new.parts[1:]): assert old_part is new_part
def test_compound_forward(self): for position_type in self.position_types: old = CompoundLocation([FeatureLocation(position_type(5), 12, 1), FeatureLocation(15, 17, 1)]) for offset in range(-2, 3): new = adjust(old, offset) assert isinstance(new.start, position_type) assert new.start == old.start + offset assert new.parts[0].end is old.parts[0].end for old_part, new_part in zip(old.parts[1:], new.parts[1:]): assert old_part is new_part
def reverse_strand(self): self.magic = self.magic.reverse_complement() self.magic_split = self.magic_split.reverse_complement() self.sub_locations = [ FeatureLocation(loc.start, loc.end, strand=loc.strand * -1) for loc in self.sub_locations ] self.location = CompoundLocation( self.sub_locations[::self.sub_locations[0].strand]) self.cds = CDSFeature(self.location, locus_tag="compound", translation="A")
def setUp(self): self.magic_split = Seq("ATGGCAxxxxxxGGTxxxxxxATTTGT") self.magic = Seq("ATGGCAGGTATTTGT") self.translation = "MAGIC" self.sub_locations = [ FeatureLocation(0, 6, strand=1), FeatureLocation(12, 15, strand=1), FeatureLocation(21, 27, strand=1) ] self.location = CompoundLocation(self.sub_locations) self.cds = CDSFeature(self.location, locus_tag="compound", translation="A")
def test_complicated(self): parts = [ FeatureLocation(121124, 122061, 1), FeatureLocation(122339, 122383, 1), FeatureLocation(122559, 122666, 1), FeatureLocation(122712, 122874, 1), FeatureLocation(123060, 123337, 1), FeatureLocation(123481, 123749, 1), FeatureLocation(123809, 124032, 1), FeatureLocation(124091, 124193, 1), FeatureLocation(124236, 124401, 1), FeatureLocation(124684, 124724, 1) ] location = CompoundLocation(parts, operator="join") cds = CDSFeature(location, locus_tag="complicated", translation="A") seq = ( "ATGAGCCCTCGTCTAGACTACAATGAAGGATACGATTCCGAAGACGAGGAGATCCCCCGTTACGTACACCAT" "TCTAGAGGAAAGAGTCATAGATCCGTGAGGACGTCAGGTCGCTCACGCACGTTGGATTACGACGGGGATGAT" "GAAGCTAGTGACCACGCTGCCCCCTCCGGGATTGATCGGGACGCTCGAGCCTGTCCAACATCTCGCAGATAT" "ACTGATGACTGCCTTGAGACACATAAATTTCGAGGTGCCCGCTCCTCTCGCTCCCGTGGACGAACCGATGAT" "AACAAGGTTTTGTACTACACCAAGTATCGCAGCCCGGCTAAGGACTTGCCTATCGAGCGTGATCCCGAGGGT" "ATTAATTTATTCAAGGTCCGACAGCACACACGGCCAAGTGACGCTCATGTGCCCAGTGGATACCGTGAGCCC" "TACGAAGTCAAGGTCGACGAGTATGAGGATGATCATCCCCGTACATGCACTAGCCGCCGTGACTCTAGACAG" "CCGAAAGTCTACAAGGTCCGGGTTGATGAGTACGAGGATAACCTCCCTGCACGCTCTCACACTGACTTTCGC" "GAGTCTCCACGGTCTGAAAGATGCTCTAGCCGCTACACCGAGGACTCGAAGCCTGGGGAGCTTCCTCCCCGC" "TCAGGGCCCTGTCGGTCCAGCAGGCCTTCTCCGGTCGATGAGGACGTCGAGTATGAGATCCGTGAGCCCCGA" "GGGCATCGCTCCAGTCGACACTCTACAGATGTTGACTTTCAGCCAGTAGAACAACATCCTCGCTTTGGACAA" "CGTGGACTCAGCAGACCTTCGCGGGTTGATGAGGAAGTCGATTATGAGATCCGTGAGCCCCGTGGCAATCGT" "GTCAGTCACGCTGCTCATGGTGACAGCCCCTGTCAGGACCAAAGCTCCAGGCATATCGGCATTCAATTGTGG" "AGTACGCGCGGACCCCGGGCGGCTGGCCGTGGCCGGGGTCCTGATGAGTCTGACGATGTTGAGCCCTAGGCA" "GGGAATTGCCGTAATGCTCTTCAAACTGTATAGCAAGCTCAGCATCAATTCTTTAACTGGCAGGCGCTCTGC" "TCGCGCGTTTCTCTCTTGGGGTGGTTGGTTTGACTGTAGATTTCCTCTTTCAAGGCTTCTAGATACACCTTT" "GGAAGATAGCAACGCTATGCAAGATATTTTTGATAATTCAAATCCTTTTTACACATGGAATAGCTGGTGTTC" "CTGTTTTATCTAGGCAATTGACCCACGCCATCTCGGTAGGTACGGTAAAAGCAAGCCGTAATCTCGTATGGC" "TTCATCCTTAGCATCGTATAGATCTCCACTCGGGACTCGGCCAGGGATCTTCCATCAATCAACGTGAAGAAG" "TCCAGCACCCCGCTGAATCATAATATCCTACCGATTCTGCTCTCTTCACCTCTAGATACCCCTCTAGACTCC" "TGTCAACATGTTCCGTACAGTCGAAGACCGCCCGACCCCAAAAGAGGTATATAACTGGCGGCTGTACACCGA" "GGCCACCATCATTGCCACTGGTACACTCTTGTGAGTAGGTGCTGTTGTAACGAAAAACATCCAACTGATCCG" "CCAGGTTCGGCTATGACTCGGCTTTTGTGGGAACTACCATTGCCCGCCAAAGCTTCGTTGATGCCTTCAACA" "TCGTCGAGTCGGAGGCGGCGGATATTTCAAGCAATATCACGTCAACCTTTCAGGCCGGCGCATTTTTCGGCG" "CCATCTTCTGCTTCTTGCCTGAGTGAAGCCGTTAGAGACGGTCTCACTGGCTAACCGGACCAAGTGACCGAC" "AAAATTGGGCGTAAATGGGCCCTTCAGGCAAACACACTGCTGTTTCTTATTGGCGCGATTGTGATGACGGCT" "GCAACACATCACCTTTCCTATATATGTAAGTCATATCCCCGTAGTAGTCAAGGTTGTTAACTAGAGCAGATG" "CTGGACGAGCTCTCACCGGCATCGCATGCGGCGCTATCACCGCGACCGTCCCCAGCTATATTGCCGAGCTGT" "CAATCGTGTCGATCCGGGGCTTCCTCACCGGGTTCTTCGAAGTCGCATACCAGATTGGTAGCTTGGTTGGAT" "TCTGGATCAACTATGGCATTAACGAGAACATGGACAACTCCTCGGCCGCAAGCTGGAGAGTGCCTATGGCAG" "TCCAGATCATCCCCGCAGGAGTCCTTTTCATTGGTGGCTTTTCCTCCATGAGAGTCCTCTCTGGCTGATGCG" "AAAAGACAGTGAGGATGCCGCGACGGCTGCCCTGGAGGCGTTGAGGAAACTGCCACGGTCTCATCAATGTAA" "TCTCCCACCAAGACTCAGGACATAGTCCCATGCTGACTATTTTAGATGTCCAGGAAGACATCGAGATGAACC" "GCACCAGGCTGCTGGAGGAAGCTCGGATCGCCGAGAAGTACGGACAAGGTTGGTTGGCATATATCCGAGGCG" "CACTCTTCGAGCTCTCGCGCCATGGGATGTGGAATCGTGTTCTGCTCGTCCTCTGTGCCTTTGCACTGCAGA" "ATATGTCGGGAGCTGCTGCTATCAACTACTATTCCCCCATACTCTTTGCGTCGTTGGGGATCACTGATGTCG" "CTCTGTATACAGGTATTTATGGCCTGGTAAAAGGTAAGTTCTTCTCCTTAAGTATCTCTGGCTGACAATAGG" "GATTAACTGATGAGTTTACAGCCGTCGCATCAATTATATTCTACGGCATTCTCATTGATATGTGGGGCCGCC" "GACGTCCGACCATTGTTTCGTCACTGGCCTGCCCTCTATGTCTCTGGTTTGTGGGTGCATACGTCAAAGTTG" "GGCATCCAGCCGATATCATAGACGCCGGCGGGGAATTGTCCCCCTCCACGGAGGCTGGTGGTAGAGCGGCGA" "CTGCGATGATTATGATCTACTCCGTCTTGTAAGTGCCCCTCACTTTTGAATGGGCTTCAGCTTGGAACTCGA" "GTAACTGGTATCCAGTTGGTCTTTTGGTCTCAACGGTATCCCCTGGATTGTCTCCGCCGAAATCTTCCCCGG" "CGCGCTGCGAAATCTCACGGGGACATGGGCTGCGCTGGTGCAATGGTATGCAATTCCCTTCACCTAGTATCC" "ATATCTAAATCAGCAGGTTGATCCAATTCGTTATCACCAAAGCTCTCCCGTACATCTTCAATAGCCTTGGGT" "ACGGGACGTGGTTCTTCTTCGCCTCCTGGATGCTGCTCGCTATCATTTGGTCATTCTTTTTTCTCCCGGAAA" "CCAAGGGGAAGACTCTCGATGAAATGCATACGATCTTGTACGTTTCTCTCCGTCGAAATGTGGTCTTGGCTA" "ATGAATCAGCGGCCATTCTCTCGCCGAAGAGCAGGGTAAGGGTGAGGTTCGAGATAACACTACTAAAAGTGA" "TCGGGAGGCTGTCTAGTCCAGTAGTTCTAGAGGACTATTGGCTGGATGATTCCTCTGATGATTTTTGATTGG" "TGGTGAAAATGTTGGATGTTTAATGCCAATGTACTGGGAGAGAACATGCCGATAGTACATACCGCTGTGTTG" "TATATCGAAGACGGTTGATTTATATATCTTAGTCTTTCAAAAGACGGCACTCACACAATCACACTTCGATGA" ) translation = ( "MSPRLDYNEGYDSEDEEIPRYVHHSRGKSHRSVRTSGRSRTLDYDGDDEASDHAAPSGIDRDAR" "ACPTSRRYTDDCLETHKFRGARSSRSRGRTDDNKVLYYTKYRSPAKDLPIERDPEGINLFKVRQ" "HTRPSDAHVPSGYREPYEVKVDEYEDDHPRTCTSRRDSRQPKVYKVRVDEYEDNLPARSHTDFR" "ESPRSERCSSRYTEDSKPGELPPRSGPCRSSRPSPVDEDVEYEIREPRGHRSSRHSTDVDFQPV" "EQHPRFGQRGLSRPSRVDEEVDYEIREPRGNRVSHAAHGDSPCQDQSSRHIGIQLWTGVPVLSR" "QLTHAISTPVNMFRTVEDRPTPKEVYNWRLYTEATIIATGTLLFGYDSAFVGTTIARQSFVDAF" "NIVESEAADISSNITSTFQAGAFFGAIFCFLPEADAGRALTGIACGAITATVPSYIAELSIVSI" "RGFLTGFFEVAYQIGSLVGFWINYGINENMDNSSAASWRVPMAVQIIPAGVLFIGGFSSMREDI" "EMNRTRLLEEARIAEKYGQGWLAYIRGALFELSRHGMWNRVLLVLCAFALQNMSGAAAINYYSP" "ILFASLGITDVALYTGIYGLVKAVASIIFYGILIDMWGRRRPTIVSSLACPLCLWFVGAYVKVG" "HPADIIDAGGELSPSTEAGGRAATAMIMIYSVFWSFGLNGIPWIVSAEIFPGALRNLTGTWAAL" "VQWLIQFVITKALPYIFNSLGYGTWFFFASWMLLAIIWSFFFLPETKGKTLDEMHTIFLSKDGT" "HTITLR") new = cds.get_sub_location_from_protein_coordinates(353, 412) # pad the beginning to match the location assert new.extract(Seq("x" * location.start + seq)).translate() == translation[353:412]