def predict_cleavage_site( query_hmmfile: str, target_sequence: str, threshold: float = -100.) -> Optional[CleavageSiteHit]: """ Extracts from HMMER the start position, end position and score of the HMM alignment for a cleavage site Arguments: query_hmmfile: the path to a HMM file for the cleavage site profile target_sequence: the sequence of a CDS feature threshold: a minimum bitscore for a HMMer hit, exclusive Returns: a CleavageSiteHit instance with the information about the hit, or None if no hit was above the threshold """ hmmer_res = subprocessing.run_hmmpfam2(query_hmmfile, target_sequence) for res in hmmer_res: for hits in res: lanthi_type = hits.description for hsp in hits: if hsp.bitscore > threshold: return CleavageSiteHit(hsp.query_start - 1, hsp.query_end, hsp.bitscore, lanthi_type) return None
def run_lassopred(record: Record, cluster: Cluster, query: CDSFeature) -> Optional[LassopeptideMotif]: """General function to predict and analyse lasso peptides""" # Run checks to determine whether an ORF encodes a precursor peptide result = determine_precursor_peptide_candidate(record, cluster, query, query.translation) if result is None: return None # prediction of cleavage in C-terminal based on lasso's core sequence c_term_hmmer_profile = 'tail_cut.hmm' thresh_c_hit = -7.5 aux = result.core[(len(result.core) // 2):] core_a_fasta = ">%s\n%s" % (query.get_name(), aux) profile = path.get_full_path(__file__, 'data', c_term_hmmer_profile) hmmer_res = subprocessing.run_hmmpfam2(profile, core_a_fasta) for res in hmmer_res: for hits in res: for seq in hits: if seq.bitscore > thresh_c_hit: result.c_cut = aux[seq.query_start + 1:] if result is None: logging.debug('%r: No C-terminal cleavage site predicted', query.get_name()) return None query.gene_functions.add(GeneFunction.ADDITIONAL, "lassopeptides", "predicted lassopeptide") return result_vec_to_motif(query, result)
def get_alignments(self) -> List[Alignment]: """ Builds an Alignment for each hit in the results of running the provided command on the provided data. """ if not self.domains_of_interest: return [] # for safety of the tools, rename long domain names to a simple numeric index data = fasta.get_fasta_from_features(self.domains_of_interest, numeric_names=True) assert data, "empty fasta created" extra_args = ["-T", "0", # min score "-E", "0.1"] # max evalue results = subprocessing.run_hmmpfam2(self.database, data, extra_args=extra_args) alignments = [] for result in results: if not result.hsps: continue assert result.id == result.hsps[0].aln[0].id # fetch back the real domain from the numeric index used in the fasta domain = self.domains_of_interest[int(result.id)] alignments.append(Alignment(domain, result.hsps[0].aln[0].seq, result.hsps[0].aln[1].seq, result.hsps[0].hit_start, result.hsps[0].hit_end)) return alignments
def find_tail(query: secmet.CDSFeature, core: str) -> str: """ Finds the tail of a prepeptide, if it exists Arguments: query: the CDS feature being checked core: the core of the prepeptide as a string Returns: the translation of the tail, or an empty string if it wasn't found """ # prediction of cleavage in C-terminal based on thiopeptide's core sequence # if last core residue != S or T or C > great chance of a tail cut tail = '' if core[-1] in "SCT": return tail thresh_c_hit = -9 temp = core[-10:] core_a_fasta = ">%s\n%s" % (query.get_name(), temp) c_term_profile = path.get_full_path(__file__, "data", 'thio_tail.hmm') c_hmmer_res = subprocessing.run_hmmpfam2(c_term_profile, core_a_fasta) for res in c_hmmer_res: for hits in res: for seq in hits: if seq.bitscore > thresh_c_hit: tail = temp[seq.query_end-1:] return tail
def predict_cleavage_site(query_hmmfile: str, target_sequence: str, threshold: float ) -> Tuple[Optional[int], float]: """ Extracts the start position, end position and score of the HMM alignment from HMMER results. Arguments: query_hmmfile: the HMM file to search target_sequence: the sequence to search threshold: the minimum bitscore a hit must have Returns: a tuple of the start of the hit, or None if no hit found the end of the hit, or None if no hit found the score of the hit, or the best score of all hits if none were above the threshold """ hmmer_res = subprocessing.run_hmmpfam2(query_hmmfile, target_sequence) best_score = 0. for res in hmmer_res: for hits in res: for hsp in hits: if hsp.bitscore > threshold: return hsp.query_end - 14, hsp.bitscore if best_score is None or hsp.bitscore > best_score: best_score = hsp.bitscore return None, best_score
def run_thiopred(query: secmet.CDSFeature, thio_type: str, domains: Set[str]) -> Optional[Thiopeptide]: """ Analyses a CDS feature to determine if it contains a thiopeptide precursor Arguments: query: the CDS feature to analyse thio_type: the suspected type of the thiopeptide domains: the set of domains found within the cluster containing the query Returns: A Thiopeptide instance if a precursor is found, otherwise None """ # Run checks to determine whether an ORF encodes a precursor peptide result = determine_precursor_peptide_candidate(query, domains) if result is None: return None # Determine thiopeptide type result.thio_type = thio_type # leader cleavage "validation" profile_pep = path.get_full_path(__file__, "data", 'thiopep2.hmm') core_a_fasta = ">%s\n%s" % (query.get_name(), result.core) hmmer_res_pep = subprocessing.run_hmmpfam2(profile_pep, core_a_fasta) thresh_pep_hit = -2 filter_out = True for res in hmmer_res_pep: for hits in res: for seq in hits: if seq.bitscore > thresh_pep_hit: filter_out = False if filter_out: return None # additional filter(s) for peptide prediction search = re.search( "[ISTV][SACNTW][STNCVG][ATCSGM][SVTFC][CGSTEAV][TCGVY].*", result.core) if not search: return None aux = search.group() if 10 < len(aux) < 20: diff = len(result.core) - len(aux) result.leader = result.leader + result.core[:diff] result.core = aux result.c_cut = find_tail(query, result.core) query.gene_functions.add(secmet.GeneFunction.ADDITIONAL, "thiopeptides", "predicted thiopeptide") return result
def predict_cleavage_site(query_hmmfile: str, target_sequence: str, threshold: float) -> Tuple[Optional[int], float]: """ Function extracts from HMMER the start position, end position and score of the HMM alignment """ hmmer_res = subprocessing.run_hmmpfam2(query_hmmfile, target_sequence) resvec = (None, 0.) for res in hmmer_res: for hits in res: for hsp in hits: # when hmm includes 1st macrolactam residue: end-2 if hsp.bitscore > threshold: resvec = (hsp.query_end - 1, hsp.bitscore) break return resvec
def predict_cleavage_site(query_hmmfile, target_sequence, threshold): """ Function extracts from HMMER the start position, end position and score of the HMM alignment """ hmmer_res = subprocessing.run_hmmpfam2(query_hmmfile, target_sequence) resvec = [None, None, None] for res in hmmer_res: for hits in res: for hsp in hits: # when hmm includes 1st macrolactam residue: end-2 if hsp.bitscore > threshold: resvec = [ hsp.query_start - 1, hsp.query_end - 1, hsp.bitscore ] return resvec return resvec