예제 #1
0
 def test_get_fasta_seq(self):
     """Tests that get_fasta_seq only gets the sequence of the fasta string, not the identifying line"""
     tester = constool.get_fasta_sequence(
         """>OAP01791.1 CDC48A [Arabidopsis thaliana]
                                 MSTPAESSDSKSKKDFSTAILERKKSPNRLVVDEAINDDNSVVSLHPATMEKLQLFRGDTILIKGKKRKD
                                 TVCIALADETCEEPKIRMNKVVRSNLRVRLGDVISVHQCPDVKYGKRVHILPVDDTVEGVTGNLFDAYLK"""
     )
     self.assertFalse('>OAP01791.1 CDC48A [Arabidopsis thaliana]' in tester)
     self.assertTrue(
         'SKKDFSTAILERKKSPNRLVVDEAINDDNSVVSLHPATMEKLQL' in tester)
예제 #2
0
    def test_get_fasta_multi(self):
        """tests that get_fasta returns a list of sequences given a fasta file"""
        tester = constool.get_fasta_sequence(
            """>PROCA12070 | ENSPCAG00000012030 | HOG:0377891.2a.2a | [Procavia capensis]
MKTRQNKDSMSMRSGRKKEAPGPREELRSRGRASPGGVSTSSSDGKAEKSRQTAKKARVEEVSAPKVSKQGRGEEISESE
>LOXAF14113 | G3TAL7 | HOG:0377891.2a.2a | [Loxodonta africana]
MKTRQNKDSMSMRSGRKKEAPGPREELRSRGRASPGGVSTSSSDGKAEKSRQTAKKARVEEASTPKVSKQGRSEEISESE
>ECHTE02547 | ENSETEG00000016682 | HOG:0377891.2a.2a | [Echinops telfairi]
MKTRQNKDSMSMRSGRKKEAPGPREELRSRGRASPGGVSTSSSDGKAEKSRQSAKKARVEEASTPKVNKQSRSEXETSAP""",
            index=1)
        self.assertFalse("[Loxodonta africana]" in tester)
        self.assertFalse(">PROCA12070 | ENSPCAG00000012030" in tester)
        self.assertEqual(
            tester,
            "MKTRQNKDSMSMRSGRKKEAPGPREELRSRGRASPGGVSTSSSDGKAEKSRQTAKKARVEEASTPKVSKQGRSEEISESE"
        )
예제 #3
0
 def get_orthologs(self):
     """
     Retrieves a fasta file containing the sequences of the orthologous proteins, based on the input parameters
     """
     if not self.fasta:
         raise SequenceError("Input sequence is empty!")
     self.sequence = constool.get_fasta_sequence(fasta=self.fasta)
     if self.has_run:
         output = self.orthologs
     else:
         self.retrieve_OMAid()
         output = self.ortholog_to_fasta()
         output = constool.remove_first_protein(output)
         output = constool.seqnwl_strip(self.sequence) + os.linesep + output
         self.has_run = True
     return output
예제 #4
0
 def get_HOGs(self):
     """
     Retrieves a fasta file containing the sequences of the proteins in the HOG to the input protein, based on the input
     parameters.
     """
     if not self.fasta:
         raise SequenceError("Input sequence is empty!")
     self.sequence = constool.get_fasta_sequence(fasta=self.fasta)
     if self.has_run_hogs:
         output = self.HOGs
     else:
         self.retrieve_OMAid()
         self.retrieve_HOG_level()
         output = self.HOG_to_fasta()
         output = constool.remove_protein(output, self.id)
         self.has_run_hogs = True
     return output
예제 #5
0
 def find_motif(self, msa, motif):
     """
     Searches a multiple sequence alignment for the given motif- if found, it returns the index at which the motif
     was found. Note: The indexing refers to the consensus sequence, or to each individual protein, as they are all
     the same length
     Args:
         msa (str): the path to the multiple sequence alignment
         motif (str): the motif to be located in the msa
     Returns:
         The index at which the motif can be found
     """
     with open(msa, "r") as file:
         msa = file.read()
     msa_list = constool.indv_block(msa)
     index = -1
     for prot in msa_list:
         prot = constool.seqnwl_strip(prot)
         prot = constool.get_fasta_sequence(prot)
         index = prot.find(motif)
         if not index == -1:
             break
     return index