예제 #1
0
 def test_indv_blk(self):
     """tests that indv_block can pull out individual fasta sequences with their identifying line"""
     tester = constool.indv_block(
         """>OAP01791.1 CDC48A [Arabidopsis thaliana]
                                 MSTPAESSDSKSKKDFSTAILERKKSPNRLVVDEAINDDNSVVSLHPATMEKLQLFRGDTILIKGKKRKD
                                 TVCIALADETCEEPKIRMNKVVRSNLRVRLGDVISVHQCPDVKYGKRVHILPVDDTVEGVTGNLFDAYLK"""
     )
     self.assertTrue(
         '>OAP01791.1 CDC48A [Arabidopsis thaliana]' in tester[0])
     self.assertTrue(
         'SKKDFSTAILERKKSPNRLVVDEAINDDNSVVSLHPATMEKLQL' in tester[0])
예제 #2
0
 def test_indv_blk_multi(self):
     """tests that indv_block can pull out individual fasta sequences from a string with multiple"""
     tester = constool.indv_block(
         ">PROCA12070 | ENSPCAG00000012030 | HOG:0377891.2a.2a | [Procavia capensis]\n"
         "MKTRQNKDSMSMRSGRKKEAPGPREELRSRGRASPGGVSTSSSDGKAEKSRQTAKKARVEEVSAPKVSKQGRGEEISESE\n"
         ">LOXAF14113 | G3TAL7 | HOG:0377891.2a.2a | [Loxodonta africana]\n"
         "MKTRQNKDSMSMRSGRKKEAPGPREELRSRGRASPGGVSTSSSDGKAEKSRQTA\n"
         ">ECHTE02547 | ENSETEG00000016682 | HOG:0377891.2a.2a | [Echinops telfairi]\n"
         "MKTRQNKDSMSMRSGRKKEAPGPREELRS")
     self.assertEqual(len(tester), 3)
     self.assertEqual(tester[1], (
         ">LOXAF14113 | G3TAL7 | HOG:0377891.2a.2a | [Loxodonta africana]\n"
         "MKTRQNKDSMSMRSGRKKEAPGPREELRSRGRASPGGVSTSSSDGKAEKSRQTA"))
예제 #3
0
 def find_motif(self, msa, motif):
     """
     Searches a multiple sequence alignment for the given motif- if found, it returns the index at which the motif
     was found. Note: The indexing refers to the consensus sequence, or to each individual protein, as they are all
     the same length
     Args:
         msa (str): the path to the multiple sequence alignment
         motif (str): the motif to be located in the msa
     Returns:
         The index at which the motif can be found
     """
     with open(msa, "r") as file:
         msa = file.read()
     msa_list = constool.indv_block(msa)
     index = -1
     for prot in msa_list:
         prot = constool.seqnwl_strip(prot)
         prot = constool.get_fasta_sequence(prot)
         index = prot.find(motif)
         if not index == -1:
             break
     return index