def calc_contact_order(chimera: Chimera = None, filename: str = None, diss_cutoff: int = 8): """ The contact order of a protein is a measure of the locality of the inter-amino acid contacts in the native folded state. It is computed as the average seqeuence distance between residues that form contacts below a threshold in the folded protein divided by the total length of the protein" :param chimera: A Chimera object with n residues. :param filename: path to a pdb file :param diss_cutoff: The maximum distance in Armstrong between two residues to be in contact, default 8 Angstroms :return: the contact order (%) """ if chimera and filename: raise ValueError("Only a Chimera object or the path to a pdb file must be specified") if not chimera and not filename: raise ValueError("At least a Chimera object or the path to a pdb file must be specified") if filename: chimera = Chimera(filename=filename) chimera.renumberResidues() metr = MetricSelfDistance("protein and noh", groupsel="residue", metric="contacts", threshold=diss_cutoff, pbc=False) a = metr.project(chimera) mapping = metr.getMapping(chimera) matrix, _, _ = contactVecToMatrix(a[0], mapping.atomIndexes) triang = np.triu(matrix) idx1, idx2 = np.where(triang) total_contacts = len(idx1) total_residues = chimera.numResidues summation = np.sum(idx2 - idx1) co = 1 / (total_contacts * total_residues) * summation print(f"Contact order is {co*100} %") return co * 100
def __init__(self, hit: Hit): qpdb_path = get_SCOP_domain(hit.query) spdb_path = get_SCOP_domain(hit.sbjct) logger.info(f'Loading {qpdb_path} as a chimera object') self.qPDB = Chimera(qpdb_path, validateElements=False) os.remove(qpdb_path) if self.qPDB.numFrames > 1: self.qPDB.dropFrames(keep=0) logger.info("Query protein contains more than one model. Keeping only the first one") logger.info(f'Loading {spdb_path} as a chimera object') self.sPDB = Chimera(spdb_path, validateElements=False) os.remove(spdb_path) if self.sPDB.numFrames > 1: self.sPDB.dropFrames(keep=0) logger.info("Subject protein contains more than one model. Keeping only the first one") self.qaPDB, self.saPDB = {}, {} self.qpairs,self.spairs = [], [] self.dst = [] self.chim_positions = {}
def calc_dist_matrix(chimera: Chimera = None, filename: str = None, selection: str = 'residue', type='contacts', plot=False): """ Returns a matrix of C-alpha distances for a given pdb :param chimera: A Chimera object with n residues. :param filename: path to a pdb file :param selection: How to compute the distance. 'residue' (the closest two :param type: between contacts (contact map when distances are below 8 armstrongs) or distances atoms between two residues) or 'alpha' distance of the alpha carbons. :param plot: whether to plot the distance matrix. Default is False :return: matrix. np.array. An n by n distance matrix. """ if chimera and filename: raise ValueError("Only a Chimera object or the path to a pdb file must be specified") if not chimera and not filename: raise ValueError("At least a Chimera object or the path to a pdb file must be specified") if filename: chimera = Chimera(filename=filename) if selection == 'residue': metr = MetricSelfDistance("protein", groupsel="residue", metric="distances", pbc=False) mapping = metr.getMapping(chimera) a = metr.project(chimera) matrix, _, _ = contactVecToMatrix(a[0], mapping.atomIndexes) elif selection == 'alpha': metr = MetricSelfDistance("protein and name CA", metric="distances", pbc=False) a = metr.project(chimera) mapping = metr.getMapping(chimera) matrix, _, _ = contactVecToMatrix(a, mapping.atomIndexes) else: raise ValueError("Specify a selection type: 'residue' or 'atom'") if type == "contacts": matrix = matrix < 8 elif type != "contacts" and type != "distances": raise ValueError("Please select contact type between 'contacts' or distances") if plot: fig = plt.figure(figsize=(12, 12)) ax = fig.add_subplot(111) cmap = 'binary' cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower") if type == 'distances': cmap = 'gist_rainbow' cax = ax.imshow(matrix, cmap=matplotlib.cm.get_cmap(cmap), interpolation='nearest', origin="lower") cbar = fig.colorbar(cax, cmap=matplotlib.cm.get_cmap(cmap)) plt.xlabel('xlabel', fontsize=24) plt.ylabel('ylabel', fontsize=24) plt.xticks(fontsize=22) plt.yticks(fontsize=22) plt.xlabel("Residue index") plt.ylabel("Residue index") return matrix
def _construct_chimera(self, qmol, smol, qstart, qend, sstart, send, combination): """ :param qmol: Molecule. The query protein :param smol: Molecule. The subject protein in any of its positions :param qstart: int. Position to start the cut in the query :param qend: int. Position to end the cut in the query :param sstart: int. Position to start the cut in the sbjct :param send: int. Position to end the cut in the sbjct. :return: Molecule, DataFrame Objects. chim1: The resulting chimera mapping: The mapping from the old residue numbering to the new one """ qmol_copy = qmol.copy() smol_copy = smol.copy() qmol_copy.filter(f"(protein and same residue as index '{qstart}' to '{qend}')\ or (not protein and same residue as within 4 of protein and same residue as index '{qstart}' to '{qend}')") smol_copy.filter(f"(protein and same residue as index '{sstart}' to '{send}')\ or (not protein and same residue as within 4 of protein and same residue as index '{qstart}' to '{qend}')") # Avoid chimeras that only have a few mutations from # one of the parents qmol_resid = qmol_copy.get("resid", sel="protein and name CA") smol_resid = smol_copy.get("resid", sel="protein and name CA") if qmol_resid.size < 10 or smol_resid.size < 10: raise NotDiverseChimeraError bbq = qmol_copy.get("coords", sel=f"protein and backbone") bbs = smol_copy.get("coords", sel=f"protein and backbone") distances = cdist(bbq, bbs) idx1, idx2 = np.where(distances < 1.3) if idx1.any() or idx2.any(): raise BackboneClashError else: chim1 = Chimera() qmol_copy.renumberResidues() smol_copy.renumberResidues() if combination == 1: last_id = smol_resid[-1] + 1 new_ids = get_new_resIDs(qmol_copy, last_id) qmol_copy.set("resid", new_ids) chim1.append(smol_copy) chim1.append(qmol_copy) else: last_id = qmol_resid[-1] + 1 new_ids = get_new_resIDs(smol_copy, last_id) smol_copy.set("resid", new_ids) chim1.append(qmol_copy) chim1.append(smol_copy) chim1.set("chain", "A", "all") return chim1, last_id
def show_vertex(self, vertex: Graph.vertex) -> Chimera: """ Shows the protein that corresponds to that specific vertex with the fragment colored in red :param vertex: A Graph.vertex object. The domain to be shown, :return: A Chimera object with an internal representation of the fragment """ graph = self.graph domain = graph.vp.domain[vertex] start = int(round(np.mean(graph.vp.start[vertex]))) end = int(round(np.mean(graph.vp.end[vertex]))) domain_path = get_SCOP_domain(domain) mol = Chimera(filename=domain_path, validateElements=False) mol.renumberResidues() mol.reps.add(sel='protein', style='NewCartoon', color=8) mol.reps.add(sel=f"protein and resid '{start}' to '{end}'", style='NewCartoon', color=1) mol.view(name=domain) return mol
def minimize_potential_energy( chimera, ff: str, output: str = "/tmp/build", keep_output_files=True, cuda=False, restraint_backbone: bool = True ) -> Tuple[unit.quantity.Quantity, Chimera]: """ :param chimera: A chimera object where to perform the minimization :param forcefield: The forcefield to use for the minimization. Select between "amber" and "charmm" :param output: A folder where to keep the files. If not provided they will be stored in the /tmp folder and later removed. :param cuda: Whether to use GPU acceleration :param restraint_backbone: Keep the backbone atoms constraint in space :return: The chimera object that was minimized and the potential energy value. """ if not os.path.exists(output): os.mkdir(output) smol = prepare_protein(chimera) smol.write(f"{output}/protein.pdb") pdb = PDBFile(f"{output}/protein.pdb") parm = load_file(f"{output}/protein.pdb") modeller = Modeller(pdb.topology, pdb.positions) if ff == 'amber': forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml') if ff == 'charmm': forcefield = ForceField('charmm36.xml', 'charmm36/tip3p-pme-b.xml') modeller.addSolvent(forcefield, padding=1.0 * unit.nanometer) system = forcefield.createSystem(modeller.topology, nonbondedMethod=PME, nonbondedCutoff=1 * unit.nanometer, constraints=HBonds) if restraint_backbone: # Applies an external force on backbone atoms # This allows the backbone to stay rigid, while severe clashes can still be resolved force = mm.CustomExternalForce("k*((x-x0)^2+(y-y0)^2+(z-z0)^2)") force.addGlobalParameter( "k", 5.0 * unit.kilocalories_per_mole / unit.angstroms**2) force.addPerParticleParameter("x0") force.addPerParticleParameter("y0") force.addPerParticleParameter("z0") for idx, atom_crd in enumerate(parm.positions): if idx >= len(parm.atoms): continue if parm.atoms[idx] in ('CA', 'C', 'N'): force.addParticle(idx, atom_crd.value_in_unit(unit.nanometers)) system.addForce(force) integrator = mm.LangevinIntegrator(temperature, friction, error_tolerance) simulation = Simulation(modeller.topology, system, integrator) simulation.context.setPositions(modeller.positions) # Get pre-minimization energy (scoring) state = simulation.context.getState(getEnergy=True, getForces=True) pre_energy = state.getPotentialEnergy().in_units_of( unit.kilocalories_per_mole) logger.info(f"Energy before minimization {pre_energy}") # Setup CPU minimization integrator.setConstraintTolerance(distance_tolerance) simulation.minimizeEnergy() post_position = simulation.context.getState( getPositions=True).getPositions() post_state = simulation.context.getState(getEnergy=True, getForces=True) if cuda: min_coords = simulation.context.getState(getPositions=True) platform = mm.Platform.getPlatformByName('CUDA') properties = {'CudaPrecision': 'mixed'} gpu_integrator = mm.VariableLangevinIntegrator(temperature, friction, error_tolerance) gpu_integrator.setConstraintTolerance(distance_tolerance) gpu_min = Simulation(modeller.topology, system, gpu_integrator, platform, properties) gpu_min.context.setPositions(min_coords.getPositions()) gpu_min.minimizeEnergy() post_position = gpu_min.context.getState( getPositions=True).getPositions() post_state = gpu_min.context.getState(getEnergy=True, getForces=True) post_energy = post_state.getPotentialEnergy().in_units_of( unit.kilocalories_per_mole) logger.info(f"Energy after minimization {post_energy}") PDBFile.writeFile(modeller.topology, post_position, open(f"{output}/structure_minimized.pdb", 'w'), keepIds=True) min_mol = Chimera(filename=f"{output}/structure_minimized.pdb") if keep_output_files is False: shutil.rmtree(output) return post_energy, min_mol
def _mol_chimera_wrapper(molecule: Molecule, chimera: Chimera) -> Chimera: molecule.write("/tmp/molecule.pdb") new_chimera = Chimera(filename="/tmp/molecule.pdb") os.remove("/tmp/molecule.pdb") return new_chimera