def fetch_subspace(prob: int = 70, rmsd: float = 3.0, ca_min: int = 10, ca_max: int = 200, score_tm_pair: float = 0.3, ratio: float = 1.25, diff_folds: bool = True) -> Result: """ Returns the entries in Fuzzle that satisfy the conditions: :param prob: Lower cutoff for the hit probability :param rmsd: Upper cutoff for rmsd_tm_pair :param ca_min: Lower cutoff for the number of AA (ca_tm_pair) :param ca_max: Upper cutoff for the number of AA (ca_tm_pair) :param score_tm_pair: Lower cutoff for the tm_score :param ratio: Proportion between cols/ca_tm_pair :return: A Result object """ diff = 'q_fold_id != s_fold_id' if diff_folds is False: diff = None ex = f"select * from hh207clusters where prob > {prob}" \ f" and rmsd_tm_pair < {rmsd}" \ f" and ca_tm_pair > {ca_min} and ca_tm_pair < {ca_max}" \ f" and score_tm_pair > {score_tm_pair} " \ f"and cast(cols as float)/ca_tm_pair < {ratio} " ex += f"and {diff} " if diff is not None else "" ex += f"and {cond} " if cond is not None else "" ex += f"and {cond1} " if cond1 is not None else "" ex += f"or {cond2} " if cond2 is not None else "" cur.execute(ex) ahits = [] tmphits = cur.fetchall() for line in tmphits: ahits.append(parse_hit(line)) return Result(ahits)
def fetch_group(group1, group2=None, prob: int = 70, rmsd: float = 3.0, ca_min: int = 10, ca_max: int = 200, score_tm_pair: float = 0.3, ratio: float = 1.25, diff_folds: bool = True) -> Result: """ Fetching all hits between two specific groups (folds, superfamilies and families) or inside one specific group (group1) :param group1: The first group from where to search. E.g 'c.2' :param group2 (optional): The second group from where to search. E.g 'c.2' :param prob: the minimum allowed HHsearch probability :param rmsd: The maximum allowed RMSD (rmsd_tm_pair: "RMSD for the TMalign alignment between the two domains, passing the sequence alignment as seed) :param ca_min: The minimum allowed fragment length (for the TMalign alignment) :param ca_max: The maximun allowed fragment length (for the TMalign alignment) :param score_tm_pair: The minimum allowed TM-score (for the TMalign alignment) :param ratio: the maximum ratio for the sequence and structural alignment lengths (cols / ca_tm_pair) :return: A Result class with the hits that fulfill these criteria """ diff = 'q_fold_id != s_fold_id' if diff_folds is False: diff = None level1 = len(group1.split('.')) if group2: level2 = len(group2.split('.')) if level1 != level2: raise ValueError("The two groups must belong to the same SCOP level") if level1 == 2: # folds scop = "q_fold_id" elif level1 == 3: # superfamilies scop = "q_sufam_id" elif level1 == 4: # families scop = "q_scop_id" else: raise ValueError("The specified SCOP level does not exist") cond1 = scop + "='" + group1 + "'" cond_1 = scop.replace('q', 's') + "='" + group1 + "'" if group2: cond2 = scop.replace('q', 's') + "='" + group2 + "'" cond_2 = scop + "='" + group2 + "'" ex = f"select * from hh207clusters where prob > {prob}" \ f" and rmsd_tm_pair < {rmsd}" \ f" and ca_tm_pair > {ca_min} and ca_tm_pair < {ca_max}" \ f" and score_tm_pair > {score_tm_pair}" \ f" and cast(cols as float)/ca_tm_pair < {ratio}" ex += f" and (({cond1} and {cond2}) " if group2 else f" and ({cond1}" ex += f" or ({cond_1} and {cond_2})) " if group2 else f" or {cond_1})" ex += f" and {diff} " if diff is not None else "" cur.execute(ex) ahits = [] tmphits = cur.fetchall() for line in tmphits: ahits.append(parse_hit(line)) return Result(ahits)
def fetch_id(fuzzle_id: int) -> Hit: """ Returns the hit in fuzzle with that ID :param fuzzle_id: The Fuzzle HIT id to retrieve from hh207clusters :return: A Hit object """ cur.execute("select * from hh207clusters where id = ?", (fuzzle_id,)) tmphits = cur.fetchall() hit = parse_hit(list(tmphits[0])) return hit
def _domain_from_PDB(pdb: str): """ Returns all domains included in a SCOP version that come from a PDB. :param pdb: A 4-letter pdb code :return: A list of domains that this PDB contains """ pdb = pdb.lower() cur.execute(f"SELECT sdomain FROM scop_pdbref_scop WHERE pdbref like '{pdb}'") hits = cur.fetchall() domains = tuple(np.unique([x[0] for x in hits])) if domains: if len(domains) == 1: domains = str("('" + domains[0] + "')") cur.execute(f"SELECT ref FROM astral_astral95 WHERE id in {domains}") hits = cur.fetchall() domains = np.unique([x[0] for x in hits]) logger.info(f"Domain(s) {domains} are present in the Fuzzle database" f" as representatives for pdb {pdb.capitalize()}") else: from protlego.builder.builder import NotCorrectPDBError raise NotCorrectPDBError(f"PDB code {pdb.capitalize()} does not yet appear in the SCOP database") return domains
def fetch_by_domains(domain1: str, domain2: str, prob: int = 70, rmsd: float = 3.0, ca_min: int = 10, ca_max: int = 200, score_tm_pair: float = 0.3, ratio: float = 1.25, diff_folds: bool = True): """ Fetch all the hits between two parent domains :param domain1: The 7 letter code for one of the parents :param domain2: The 7 letter code for one of the parents :param prob: the minimum allowed HHsearch probability :param rmsd: The maximum allowed RMSD (rmsd_tm_pair: "RMSD for the TMalign alignment between the two domains, passing the sequence alignment as seed) :param ca_min: The minimum allowed fragment length (for the TMalign alignment) :param ca_max: The maximun allowed fragment length (for the TMalign alignment) :param score_tm_pair: The minimum allowed TM-score (for the TMalign alignment) :param ratio: the maximum ratio for the sequence and structural alignment lengths (cols / ca_tm_pair) :param diff_folds: Whether to exclude hits from the same fold (True) or not (False) :return: A result class obtaining the hits that fulfill these criteria """ diff = 'q_fold_id != s_fold_id' if diff_folds is False: diff = None ex = f"select * from hh207clusters where prob > {prob}" \ f" and (rmsd_tm_pair < {rmsd} or rmsd_tm_pair is NULL)" \ f" and ca_tm_pair > {ca_min} and ca_tm_pair < {ca_max}" \ f" and score_tm_pair > {score_tm_pair}" \ f" and cast(cols as float)/ca_tm_pair < {ratio}" \ f" and ((query='{domain1}' and sbjct='{domain2}')" \ f" or (sbjct = '{domain1}' and query='{domain2}'))" \ f" and query != sbjct" ex += f" and {diff} " if diff is not None else "" cur.execute(ex) ahits = [] tmphits = cur.fetchall() if len(tmphits) == 1: hit = parse_hit(tmphits[0]) else: for line in tmphits: ahits.append(parse_hit(line)) hit = Result(ahits) return hit
def fetch_byPDBs(pdb1: str, pdb2: str, prob: int = 70, rmsd: float = 3.0, ca_min: int = 10, ca_max: int = 200, score_tm_pair: float = 0.3, ratio: float = 1.25, diff_folds: bool = True): """ Includes all hits among the domains that belong to a pair of PDBs :param pdb1: The first PDB to check :param pdb2: The second PDB to check :param prob: the minimum allowed HHsearch probability :param rmsd: The maximum allowed RMSD (rmsd_tm_pair: "RMSD for the TMalign alignment between the two domains, passing the sequence alignment as seed) :param ca_min: The minimum allowed fragment length (for the TMalign alignment) :param ca_max: The maximun allowed fragment length (for the TMalign alignment) :param score_tm_pair: The minimum allowed TM-score (for the TMalign alignment) :param ratio: the maximum ratio for the sequence and structural alignment lengths (cols / ca_tm_pair) :param diff_folds: Whether to exclude hits from the same fold (True) or not (False) :return: A result class obtaining the hits that fulfill these criteria """ domains1 = tuple(_domain_from_PDB(pdb1)) domains2 = tuple(_domain_from_PDB(pdb2)) if len(domains1) == 1: domains1 == domains1[0] if len(domains2) == 1: domains2 == domains2[0] domains = domains1 + domains2 diff = 'q_fold_id != s_fold_id' if diff_folds is False: diff = None ex = f"select * from hh207clusters where prob > {prob}" \ f" and rmsd_tm_pair < {rmsd}" \ f" and ca_tm_pair > {ca_min} and ca_tm_pair < {ca_max}" \ f" and score_tm_pair > {score_tm_pair}" \ f" and cast(cols as float)/ca_tm_pair < {ratio}" ex += f" and query in {domains}" ex += f" and sbjct in {domains}" ex += f" and {diff} " if diff is not None else "" cur.execute(ex) ahits = [] tmphits = cur.fetchall() for line in tmphits: ahits.append(parse_hit(line)) return Result(ahits)