def run(self, cavity=True): """from fragment hotspot calc from protein""" h = Runner() settings = Runner.Settings(sphere_maps=False) if self.args.prepare is True: self.prepare_protein() else: self.prot = Protein.from_file(self.args.prot_fname) if cavity is True: cavs = Cavity.from_pdb_file(self.args.prot_fname) print(cavs) else: cavs = None result = h.from_protein(protein=self.prot, charged_probes=False, buriedness_method=self.args.buriedness_method, cavities=cavs, nprocesses=5, settings=settings) with HotspotWriter(path=self.in_dir, zip_results=self.args.zipped) as writer: writer.write(result)
def cavity_from_protein(prot): """ currently the Protein API doesn't support the generation of cavities directly from the Protein instance this method handles the tedious writing / reading :param `ccdc.protein.Protein` prot: protein :return: `ccdc.cavity.Cavity` """ tfile = join(tempfile.mkdtemp(), "protein.pdb") with MoleculeWriter(tfile) as writer: writer.write(prot) return Cavity.from_pdb_file(tfile)
def calc_hr(self): """ runs the hotspot calculation and returns a `hotspots.calculation.Result` :return: `hotspots.calculation.Result` """ h = calculation.Runner() settings = calculation.Runner.Settings(sphere_maps=True) cavs = Cavity.from_pdb_file(self._hs_fname) return h.from_protein(protein=self.protein, charged_probes=True, buriedness_method='ghecom', cavities=cavs, nprocesses=5, settings=settings)
def from_pdb(self, pdb_code, charged_probes=False, probe_size=7, buriedness_method='ghecom', nprocesses=3, cavities=False, settings=None): """ generates a result from a pdb code :param str pdb_code: PDB code :param bool charged_probes: If True include positive and negative probes :param int probe_size: Size of probe in number of heavy atoms (3-8 atoms) :param str buriedness_method: Either 'ghecom' or 'ligsite' :param int nprocesses: number of CPU's used :param `hotspots.calculation.Runner.Settings` settings: holds the calculation settings :return: a :class:`hotspots.result.Result` instance >>> from hotspots.calculation import Runner >>> runner = Runner() >>> runner.from_pdb("1hcl") Result() """ tmp = tempfile.mkdtemp() PDBResult(identifier=pdb_code).download(out_dir=tmp) fname = join(tmp, "{}.pdb".format(pdb_code)) self.protein = Protein.from_file(fname) self._prepare_protein() self.charged_probes = charged_probes self.probe_size = probe_size self.buriedness_method = buriedness_method self.cavities = None if cavities is True: self.cavities = Cavity.from_pdb_file(fname) self.nprocesses = nprocesses if settings is None: self.sampler_settings = self.Settings() else: self.sampler_settings = settings self._calc_hotspots() self.super_grids = {p: g[0] for p, g in self.out_grids.items()} return Results(super_grids=self.super_grids, protein=self.protein, buriedness=self.buriedness)
def run(self): prot = Protein.from_file(self.input().path) cavs = Cavity.from_pdb_file(self.input().path) h = Runner() s = h.Settings() s.apolar_translation_threshold = 15 s.polar_translation_threshold = 15 s.polar_contributions = False s.nrotations = 1000 hr = h.from_protein(prot, buriedness_method='ghecom', nprocesses=1, settings=s, cavities=cavs) out_settings = HotspotWriter.Settings() out_settings.charged = False w = HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) w.write(hr)
def dock(self): """ Setup and execution of docking run with GOLD. NB: Docking Settings class is imported from the Hotspots API rather than Docking API. This is essential for running hotspot guided docking. :return: a :class:`ccdc.io.MoleculeReader` """ docker = Docker() docker.settings = hs_docking.DockerSettings() # download protein PDBResult(self.args.pdb).download(self.temp) protein = Protein.from_file( os.path.join(self.temp, self.args.pdb + ".pdb")) protein.remove_all_waters() protein.remove_all_metals() protein.add_hydrogens() for l in protein.ligands: protein.remove_ligand(l.identifier) f = os.path.join(self.temp, self.args.pdb + ".mol2") with MoleculeWriter(f) as w: w.write(protein) # setup docker.settings.add_protein_file(f) # create binding site from list of residues cavs = Cavity.from_pdb_file( os.path.join(self.temp, self.args.pdb + ".pdb")) cavs[0].to_pymol_file("test.py") c = {} for i, cav in enumerate(cavs): cav.feats = [] for f in cav.features: try: cav.feats.append(f.residue) except: continue # cav.feats = [f.residue for f in cav.features] cav.len = len(cav.feats) c.update({cav.len: cav.feats}) cav.to_pymol_file("{}.py".format(i)) selected_cavity = max(c.keys()) docker.settings.binding_site = docker.settings.BindingSiteFromListOfResidues( protein=docker.settings.proteins[0], residues=c[selected_cavity]) docker.settings.fitness_function = 'plp' docker.settings.autoscale = 100. docker.settings.output_directory = self.temp docker.settings.output_file = "docked_ligands.mol2" docker.settings.add_ligand_file(self.search_ligands, ndocks=25) # constraints if self.args.hotspot_guided is True: e_settings = result.Extractor.Settings() e_settings.mvon = True extractor = result.Extractor(self.hr, settings=e_settings) bv = extractor.extract_best_volume(volume=300)[0] f = hs_utilities.Helper.get_out_dir( os.path.join(self.args.path, "best_volume")) with hs_io.HotspotWriter(path=f) as hw: hw.write(bv) constraints = docker.settings.HotspotHBondConstraint.create( protein=docker.settings.proteins[0], hr=bv, weight=5, min_hbond_score=0.2, max_constraints=5) for constraint in constraints: docker.settings.add_constraint(constraint) docker.settings.generate_fitting_points(hr=bv) mol = Molecule(identifier="constraints") for constraint in constraints: for a in constraint.atoms: mol.add_atom( Atom(atomic_symbol="C", atomic_number=14, label="Du", coordinates=a.coordinates)) with MoleculeWriter(os.path.join(self.args.path, "constaints.mol2")) as w: w.write(mol) docker.dock() results = docker.Results(docker.settings) return results.ligands
def main(): data_dir = "/local/pcurran/leads_frag" # pdbs = [p for p in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, p))] pdbs = ["4P7X"] for pdb in pdbs: print(pdb) # download ftp_download(pdb, out_dir=os.path.join(data_dir, pdb)) # prepare fpath = os.path.join(data_dir, pdb, f"{pdb}.pdb") prot = Protein.from_file(fpath) prot.remove_all_metals() prot.remove_all_waters() # cavity reader can not handle incomplete residues for r in prot.residues: if r.is_incomplete(): prot.remove_residue(r.identifier) # cofactors were removed in the original set for cof in prot.cofactors: prot.remove_cofactor(cof.identifier) for lig in prot.ligands: prot.remove_ligand(lig.identifier) with MoleculeWriter(fpath) as w: w.write(prot) # molecule centre of geometry mol_file = os.path.join(data_dir, pdb, f"{pdb}_ref.mol2") mol = MoleculeReader(mol_file)[0] mol_centroid = mol.centre_of_geometry() # detect cavity cavities = Cavity.from_pdb_file(fpath) cavity_centroids = [] for i, c in enumerate(cavities): coords = [f.coordinates for f in c.features for c in cavities] cavity_centroids.append(centroid(coords)) index = np.argmin(distance.cdist(cavity_centroids, [mol_centroid])) print(index) cav = cavities[index] for f in cav.features: print(f), print(f.residue) # Cavity to GOLD cavity file residues = list( {str(f.residue.identifier).split(":")[1] for f in cav.features}) cav_str = format_cavity_file(residues) with open(os.path.join(data_dir, pdb, "cavity.txt"), "w") as w: w.write(cav_str) f = create_pymol_file(residues, pdb) f.write(os.path.join(data_dir, pdb, "cav_vis.py"))
def _get_cavities(self, min_vol): """ detect cavities using Cavity API, generate new directory for each cavity :return: None """ # inputs cavs = [c for c in Cavity.from_pdb_file(self.apo_prep) if c.volume > min_vol] # task for i in range(len(cavs)): create_directory(path=os.path.join(self.working_dir, 'cavity_{}'.format(i))) cav_dic = {os.path.join(self.working_dir, 'cavity_{}'.format(i)): Helper.cavity_centroid(c) for i, c in enumerate(cavs)} cav_volume_dic = {os.path.join(self.working_dir, 'cavity_{}'.format(i), "cavity.volume"): c.volume for i, c in enumerate(cavs)} cav_bb = {os.path.join(self.working_dir, 'cavity_{}'.format(i), "bounding_box.pkl"): c.bounding_box for i, c in enumerate(cavs)} # output for path, origin in cav_dic.items(): with open(os.path.join(path, "cavity_origin.pkl"), 'wb') as handle: pickle.dump(origin, handle) for path, vol in cav_volume_dic.items(): with open(os.path.join(path), 'w') as f: f.write(str(vol)) for path, bb in cav_bb.items(): with open(os.path.join(path), 'wb') as h: pickle.dump(bb, h) # update attr self.runs += ["cavity_{}".format(i) for i in range(len(cavs))] self.cavities = { "cavity_{}".format(p): os.path.join(self.working_dir, 'cavity_{}'.format(p), "cavity_origin.pkl") for p in range(len(cav_dic))} self.cavities_volumes = { "cavity_{}".format(p): os.path.join(self.working_dir, 'cavity_{}'.format(p), "cavity.volume") for p in range(len(cav_dic))} self.cavity_score = { "cavity_{}".format(p): os.path.join(self.working_dir, "cavity_{}".format(p), "cavity.score") for p in range(len(cav_dic))} self.bounding_box = { "cavity_{}".format(p): os.path.join(self.working_dir, 'cavity_{}'.format(p), "bounding_box.pkl") for p in range(len(cav_dic))} self.superstar = {p: os.path.join(self.working_dir, p, "superstar") for p in self.runs} self.superstar_time = {k: os.path.join(v, "time.time") for k, v in self.superstar.items()} self.hotspot = {p: os.path.join(self.working_dir, p, "hotspot") for p in self.runs} self.hotspot_time = {k: os.path.join(v, "time.time") for k, v in self.hotspot.items()} self.bcv = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "volume_{}".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.bcv_time = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "volume_{}".format(k), "time.time") for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.bcv_threshold = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "volume_{}".format(k), "threshold.dat") for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.bcv_lig_overlaps = { i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "lig_overlap_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.bcv_hot_overlaps = { i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "hot_overlap_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.hot_lig_overlaps = { i: {pid: {k: os.path.join(self.working_dir, i, "hotspot", "lig_overlap_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.hot_hot_overlaps = { i: {pid: {k: os.path.join(self.working_dir, i, "hotspot", "hot_overlap_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.atomic_overlaps = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "atomic_overlap_{}.dat".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.matched = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "atom_match_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs}
if mol.identifier.split(" ")[1] not in seen: seen.append(mol.identifier.split(" ")[1]) new_dict[i].append(mol) else: print "seen" return new_dict ###################################################################################################################### target = "CDK2" pdb = "1aq1" in_dir = "Z:/fragment-hotspot-results/patel_set/{}/{}/gold_standard".format(target, pdb) prot = join(dirname(in_dir), "{}.pdb".format(pdb)) ###################################################################################################################### fnames, pdb_codes = get_fnames(join(in_dir, "output")) cavities = Cavity.from_pdb_file(prot) ligand_by_cavity_mix = extracted_ligands(fnames, pdb_codes, cavities) #ligand_by_cavity = deduplicate(ligand_by_cavity_mix) ligand_by_cavity = ligand_by_cavity_mix for c, mols in ligand_by_cavity.items(): with MoleculeWriter(join(in_dir, "cavity_mol_{}.sdf".format(c))) as w: for m in mols: w.write(m) for k in range(len(cavities)): if len(ligand_by_cavity[k]) > 0: in_mols = join(in_dir, "cavity_mol_{}.sdf".format(k)) ms = [x for x in Chem.ForwardSDMolSupplier(in_mols) if x is not None] #print len(ms)