Example #1
0
def predict_cleavage_site(
        query_hmmfile: str,
        target_sequence: str,
        threshold: float = -100.) -> Optional[CleavageSiteHit]:
    """ Extracts from HMMER the start position, end position and score
        of the HMM alignment for a cleavage site

        Arguments:
            query_hmmfile: the path to a HMM file for the cleavage site profile
            target_sequence: the sequence of a CDS feature
            threshold: a minimum bitscore for a HMMer hit, exclusive

        Returns:
            a CleavageSiteHit instance with the information about the hit, or
            None if no hit was above the threshold
    """
    hmmer_res = subprocessing.run_hmmpfam2(query_hmmfile, target_sequence)

    for res in hmmer_res:
        for hits in res:
            lanthi_type = hits.description
            for hsp in hits:
                if hsp.bitscore > threshold:
                    return CleavageSiteHit(hsp.query_start - 1, hsp.query_end,
                                           hsp.bitscore, lanthi_type)
    return None
Example #2
0
def run_lassopred(record: Record, cluster: Cluster,
                  query: CDSFeature) -> Optional[LassopeptideMotif]:
    """General function to predict and analyse lasso peptides"""

    # Run checks to determine whether an ORF encodes a precursor peptide
    result = determine_precursor_peptide_candidate(record, cluster, query,
                                                   query.translation)
    if result is None:
        return None

    # prediction of cleavage in C-terminal based on lasso's core sequence
    c_term_hmmer_profile = 'tail_cut.hmm'
    thresh_c_hit = -7.5

    aux = result.core[(len(result.core) // 2):]
    core_a_fasta = ">%s\n%s" % (query.get_name(), aux)

    profile = path.get_full_path(__file__, 'data', c_term_hmmer_profile)
    hmmer_res = subprocessing.run_hmmpfam2(profile, core_a_fasta)

    for res in hmmer_res:
        for hits in res:
            for seq in hits:
                if seq.bitscore > thresh_c_hit:
                    result.c_cut = aux[seq.query_start + 1:]

    if result is None:
        logging.debug('%r: No C-terminal cleavage site predicted',
                      query.get_name())
        return None

    query.gene_functions.add(GeneFunction.ADDITIONAL, "lassopeptides",
                             "predicted lassopeptide")

    return result_vec_to_motif(query, result)
Example #3
0
    def get_alignments(self) -> List[Alignment]:
        """ Builds an Alignment for each hit in the results of running the
            provided command on the provided data.
        """
        if not self.domains_of_interest:
            return []

        # for safety of the tools, rename long domain names to a simple numeric index
        data = fasta.get_fasta_from_features(self.domains_of_interest, numeric_names=True)
        assert data, "empty fasta created"

        extra_args = ["-T", "0",  # min score
                      "-E", "0.1"]  # max evalue
        results = subprocessing.run_hmmpfam2(self.database, data, extra_args=extra_args)

        alignments = []
        for result in results:
            if not result.hsps:
                continue
            assert result.id == result.hsps[0].aln[0].id
            # fetch back the real domain from the numeric index used in the fasta
            domain = self.domains_of_interest[int(result.id)]
            alignments.append(Alignment(domain, result.hsps[0].aln[0].seq, result.hsps[0].aln[1].seq,
                                        result.hsps[0].hit_start, result.hsps[0].hit_end))
        return alignments
Example #4
0
def find_tail(query: secmet.CDSFeature, core: str) -> str:
    """ Finds the tail of a prepeptide, if it exists

        Arguments:
            query: the CDS feature being checked
            core: the core of the prepeptide as a string

        Returns:
            the translation of the tail, or an empty string if it wasn't found
    """
    # prediction of cleavage in C-terminal based on thiopeptide's core sequence
    # if last core residue != S or T or C > great chance of a tail cut
    tail = ''
    if core[-1] in "SCT":
        return tail
    thresh_c_hit = -9

    temp = core[-10:]
    core_a_fasta = ">%s\n%s" % (query.get_name(), temp)

    c_term_profile = path.get_full_path(__file__, "data", 'thio_tail.hmm')
    c_hmmer_res = subprocessing.run_hmmpfam2(c_term_profile, core_a_fasta)

    for res in c_hmmer_res:
        for hits in res:
            for seq in hits:
                if seq.bitscore > thresh_c_hit:
                    tail = temp[seq.query_end-1:]
    return tail
Example #5
0
def predict_cleavage_site(query_hmmfile: str, target_sequence: str, threshold: float
                          ) -> Tuple[Optional[int], float]:
    """ Extracts the start position, end position and score
        of the HMM alignment from HMMER results.

        Arguments:
            query_hmmfile: the HMM file to search
            target_sequence: the sequence to search
            threshold: the minimum bitscore a hit must have

        Returns:
            a tuple of
                the start of the hit, or None if no hit found
                the end of the hit, or None if no hit found
                the score of the hit, or the best score of all hits if none
                        were above the threshold
    """
    hmmer_res = subprocessing.run_hmmpfam2(query_hmmfile, target_sequence)

    best_score = 0.
    for res in hmmer_res:
        for hits in res:
            for hsp in hits:
                if hsp.bitscore > threshold:
                    return hsp.query_end - 14, hsp.bitscore
                if best_score is None or hsp.bitscore > best_score:
                    best_score = hsp.bitscore

    return None, best_score
Example #6
0
def run_thiopred(query: secmet.CDSFeature, thio_type: str,
                 domains: Set[str]) -> Optional[Thiopeptide]:
    """ Analyses a CDS feature to determine if it contains a thiopeptide precursor

        Arguments:
            query: the CDS feature to analyse
            thio_type: the suspected type of the thiopeptide
            domains: the set of domains found within the cluster containing the query

        Returns:
            A Thiopeptide instance if a precursor is found, otherwise None
    """
    # Run checks to determine whether an ORF encodes a precursor peptide
    result = determine_precursor_peptide_candidate(query, domains)
    if result is None:
        return None

    # Determine thiopeptide type
    result.thio_type = thio_type

    # leader cleavage "validation"
    profile_pep = path.get_full_path(__file__, "data", 'thiopep2.hmm')
    core_a_fasta = ">%s\n%s" % (query.get_name(), result.core)
    hmmer_res_pep = subprocessing.run_hmmpfam2(profile_pep, core_a_fasta)

    thresh_pep_hit = -2
    filter_out = True
    for res in hmmer_res_pep:
        for hits in res:
            for seq in hits:
                if seq.bitscore > thresh_pep_hit:
                    filter_out = False

    if filter_out:
        return None

    # additional filter(s) for peptide prediction
    search = re.search(
        "[ISTV][SACNTW][STNCVG][ATCSGM][SVTFC][CGSTEAV][TCGVY].*", result.core)
    if not search:
        return None
    aux = search.group()

    if 10 < len(aux) < 20:
        diff = len(result.core) - len(aux)
        result.leader = result.leader + result.core[:diff]
        result.core = aux

    result.c_cut = find_tail(query, result.core)

    query.gene_functions.add(secmet.GeneFunction.ADDITIONAL, "thiopeptides",
                             "predicted thiopeptide")
    return result
Example #7
0
def predict_cleavage_site(query_hmmfile: str, target_sequence: str,
                          threshold: float) -> Tuple[Optional[int], float]:
    """
    Function extracts from HMMER the start position, end position and score
    of the HMM alignment
    """
    hmmer_res = subprocessing.run_hmmpfam2(query_hmmfile, target_sequence)
    resvec = (None, 0.)
    for res in hmmer_res:
        for hits in res:
            for hsp in hits:
                # when hmm includes 1st macrolactam residue: end-2
                if hsp.bitscore > threshold:
                    resvec = (hsp.query_end - 1, hsp.bitscore)
                    break
    return resvec
Example #8
0
def predict_cleavage_site(query_hmmfile, target_sequence, threshold):
    """
    Function extracts from HMMER the start position, end position and score
    of the HMM alignment
    """
    hmmer_res = subprocessing.run_hmmpfam2(query_hmmfile, target_sequence)
    resvec = [None, None, None]
    for res in hmmer_res:
        for hits in res:
            for hsp in hits:

                # when hmm includes 1st macrolactam residue: end-2
                if hsp.bitscore > threshold:
                    resvec = [
                        hsp.query_start - 1, hsp.query_end - 1, hsp.bitscore
                    ]
                    return resvec
    return resvec