def compute_hydrophobic_clusters( self, sel: str = "protein and not backbone and noh and resname ILE VAL LEU", cutoff_area: float = 10): """ :param sel: VMD selection on which to compute the clusters. Default is every sidechain heavy atom ILE, VAL and LEU residues. "protein and not backbone and noh and resname ILE VAL LEU" cutoff_area: Minimum area between residues to be considered in contact. :return: A representation for each cluster """ clusters = None # Removing previous visualizations [ self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist))) ] resids = self.get("resid", sel=f"{sel}") # ILV cluster residues chains = self.get("chain", sel=f"{sel}") # chains of ILV cluster residues dims = len(resids) # length of iLV clusters indices = self.get( "index", sel=f"{sel}") # the indices of the atoms in the ILV clusters # get a dictionary of atom index and resid position in resids list atom_to_residposition = {} for index in indices: resid = self.get("resid", sel=f"index {index}")[0] chain = self.get("chain", sel=f"index {index}")[0] index_residue = [ j for j, residue in enumerate(resids) if (residue == resid and chains[j] == chain) ][0] atom_to_residposition[index] = index_residue logger.info("Initializing final output") contacts = np.zeros((dims, dims)) logger.info("Computing clusters") for index in indices: a = Atom(index, self) if not a.neighbor_indices.any(): continue contacts = fill_matrices(a, self, contacts, indices, atom_to_residposition) graph = create_graph(contacts, resids, chains, cutoff_area=cutoff_area) comp, _ = label_components(graph) if comp.a.any(): clusters = add_clusters(self, graph, comp) else: logger.warning( "There are not residues in contact for this selection") return clusters
def get_FUZZLE_hhs(domain): """ :param domain: str. The domain to download from Fuzzle as hhs :return: filepath: path where the file is located. """ logger.info( f'Attempting to download hhs file for {domain} from the FUZZLE server') url = f'https://fuzzle.uni-bayreuth.de/hhs/scop95_2.07.psi.hhs/{domain}.hhs' connected = False while not connected: try: response = urllib.request.urlopen(url) text = response.read() except Exception as e: import time logger.warning( f'Failed to connect to FUZZLE with error {e}. Sleeping 5s and retrying.' ) time.sleep(5) continue connected = True filepath = string_to_tempfile(text.decode('ascii'), 'hhs') logger.info(f"File downloaded as {filepath}") return filepath
def get_SCOP_domain(domain): """ :param domain: str. The SCOPe domain to download as pdb :return: """ logger.info(f'Attempting to download domain {domain} from the SCOP server') url = f'https://scop.berkeley.edu/astral/pdbstyle/ver=2.07&id={domain}&output=text' connected = False while not connected: try: response = urllib.request.urlopen(url) text = response.read() except Exception as e: import time logger.warning( f'Failed to connect to SCOP with error {e}. Sleeping 5s and retrying.' ) time.sleep(5) continue connected = True filepath = string_to_tempfile(text.decode('ascii'), 'pdb') logger.info(f"File downloaded as {filepath}") return filepath
def compute_salt_bridges(self): salts = [] [ self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist))) ] metr = MetricDistance('sidechain and acidic and element O', 'sidechain and basic and element N', metric="contacts", threshold=3.2, pbc=False) try: data = metr.project(self) mapping = metr.getMapping(self) if len(np.shape(data)) > 1: data = data[0].copy() # handling NMR structures self.reps.add(sel='protein', style='NewCartoon', color=8) if mapping[data].atomIndexes.values.any(): for salt in mapping[data].atomIndexes.values: resid1 = self.get( "resid", sel=f"same residue as index {salt[0]}")[0] chain1 = self.get( "chain", sel=f"same residue as index {salt[0]}")[0] resid2 = self.get( "resid", sel=f"same residue as index {salt[1]}")[0] chain2 = self.get( "chain", sel=f"same residue as index {salt[1]}")[0] if [resid1, resid2] not in salts: salts.append({ "residues": [int(resid1), int(resid2)], "chain": [chain1, chain2] }) self.reps.add(f"protein and resid {resid1}", style="Licorice", color="1") self.reps.add(f"protein and resid {resid2}", style="Licorice", color="0") except: logger.error("Molecule has no basic or acidic residues") raise graph = make_graph_salts(salts) comp, _ = label_components(graph) if comp.a.size != 0: salts = add_networks_salts(graph, comp) else: logger.warning('No salt bridges present in the structure') return salts
def find_nonstandards(pdb: Molecule) -> list: """ Finds non-standard aminoacids :param pdb: Molecule or Chimera object where to find non-standard aminoacids. :return: list of non-standard residues """ non_standards = [aa for aa in np.unique(pdb.resname) if (aa in aa_keys or aa not in standard_aas)] if non_standards: for i in non_standards: if i != 'UNK': logger.info(f"Found the following non-standar residue: {i}. " f"Preserving in the original PDB") else: logger.warning("Protein presents unknown residue UNK." " Call remove_residue() to remove it or provide parameters" " if you want to minimize it with AMBER or CHARMM.") return non_standards
def compute_hydrophobic_clusters( self, chain: str = 'A', sel: str = "protein and not backbone and noh and resname ILE VAL LEU", cutoff_area: float = 10): """ :param chain: Chain in the PDB to compute the hydrophobic clusters. Examples: "A", "A B C". Default: "A" :param sel: VMD selection on which to compute the clusters. Default is every sidechain heavy atom ILE, VAL and LEU residues. "protein and not backbone and noh and resname ILE VAL LEU" :return: A representation for each cluster """ clusters = None # Removing previous visualizations [ self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist))) ] resids = np.unique(self.get("resid", sel=f"{sel} and chain {chain}")) dims = len(resids) indices = self.get("index", sel=f"{sel} and chain {chain}") dims_indices = len(self.get("index", sel=f"protein and chain {chain}")) logger.info("Initializing final output") contacts = np.zeros((dims, dims)) atoms_to_atoms = np.zeros((dims_indices, dims_indices)) logger.info("Computing clusters") for index in indices: a = Atom(index, self) if not a.neighbor_indices.any(): continue _, contacts = fill_matrices(a, self, atoms_to_atoms, contacts, indices, resids) graph = create_graph(contacts, resids, cutoff_area=cutoff_area) comp, _ = label_components(graph) if comp.a.any(): clusters = add_clusters(self, graph, comp) else: logger.warning( "There are not residues in contact for this selection") return clusters
def compute_salt_bridges(self): salts = [] [ self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist))) ] metr = MetricDistance('sidechain and acidic and element O', 'sidechain and basic and element N', metric="contacts", threshold=3.2, pbc=False) try: data = metr.project(self) except: logger.error("Molecule has no basic or acidic residues") raise if len(np.shape(data)) > 1: data = data[0].copy() # handling NMR structures mapping = metr.getMapping(self) self.reps.add(sel='protein', style='NewCartoon', color=8) if mapping[data].atomIndexes.values.any(): for bond in mapping[data].atomIndexes.values: resid1 = self.get("resid", sel=f"same residue as index {bond[0]}")[0] resid2 = self.get("resid", sel=f"same residue as index {bond[1]}")[0] if [resid1, resid2] not in salts: salts.append([resid1, resid2]) self.reps.add(f"protein and resid {resid1}", style="Licorice", color="1") self.reps.add(f"protein and resid {resid2}", style="Licorice", color="0") else: logger.warning("No salt bridges found in this protein") return salts
def build_chimeras(self, partial_alignment: bool = False, cutoff_distance: float = 1) -> Dict[str, Chimera]: """ Build all possible chimeras between the two proteins that fulfill these two criteria: 1) That the distance between the fusion points is below the cutoff distance 2) That the resulting chimera does not present any backbone clashes :return: A dictionary with all the possible chimeras """ if self.dst is None: logger.error("You need to align the structures before building the chimeras") chimeras = {} outcomes = ['Query N-terminal', 'Subject N-terminal', 'Not enough mutations Query N-terminal', 'Not enough mutations Subject N-terminal', 'Backbone clash'] self.chim_positions = dict(zip(outcomes, [[] for i in range(len(outcomes))])) q_indices = self.qPDB.get("index", sel="protein and name CA") qstart = min(q_indices) qend = max(q_indices) s_indices = self.sPDB.get("index", sel="protein and name CA") sstart = min(s_indices) send = max(s_indices) if partial_alignment is False: qpairs = self.global_qpairs spairs = self.global_spairs dst = self.global_dst else: qpairs = self.qpairs spairs = self.spairs dst = self.dst # Get the positions in the fragment closer than the cutoff for aln_index, chunk in enumerate(dst): if aln_index not in self.saPDB: logger.error(f"Alignment {aln_index+1} was not produced. Skipping to next alignment.") continue fusion_points = [index for index, distance in enumerate(chunk) if distance < cutoff_distance] # Build query-subject chimera for index in fusion_points: qMOL = self.qaPDB[0].copy() sMOL = self.saPDB[aln_index].copy() xo_query = qpairs[aln_index][index] xo_subject = spairs[aln_index][index] xo_index = [index for index, number in enumerate(self.global_qpairs[0]) if number == xo_query][0] residues = self.qPDB.get("resid", sel="index %s" % ' '.join(map(str, self.global_qpairs[0]))) xo_resid = residues[xo_index] try: xo_query_1 = qpairs[aln_index][index + 1] xo_subject_1 = spairs[aln_index][index + 1] except: # Position corresponds to C-terminus limit of the fragment xo_query_1 = [i + 1 for i, qindex in enumerate(q_indices) if qindex == xo_query][0] xo_subject_1 = [i + 1 for i, sindex in enumerate(s_indices) if sindex == xo_subject][0] # Combination query-subject try: chimera1, xo = self._construct_chimera(qMOL, sMOL, qstart, xo_query, xo_subject_1, send, 0) self.chim_positions['Query N-terminal'].append(xo_query) chimeras[f"comb1_{xo_resid}"] = chimera1 chimeras[f"comb1_{xo_resid}"].add_crossover(xo) except NotDiverseChimeraError: self.chim_positions['Not enough mutations Query N-terminal'].append(xo_query) except BackboneClashError: self.chim_positions['Backbone clash'].append(xo_query) # Combination subject-query try: chimera2, xo = self._construct_chimera(qMOL, sMOL, xo_query_1, qend, sstart, xo_subject, 1) self.chim_positions['Subject N-terminal'].append(xo_query) chimeras[f"comb2_{xo_resid}"] = chimera2 chimeras[f"comb2_{xo_resid}"].add_crossover(xo) except NotDiverseChimeraError: self.chim_positions['Not enough mutations Subject N-terminal'].append(xo_query) except BackboneClashError: self.chim_positions['Backbone clash'].append(xo_query) if not chimeras: logger.warning("No combination of query and subject produced a chimera that matched the criteria") return chimeras