def run(self): # create pharmacophore ref = PharmacophoreModel.from_pdb(pdb_code=self.pdb, chain=self.chain, representatives=self.input().path, identifier=self.pdb) ref.rank_features(max_features=6, feature_threshold=5) # write pymol file ref.write(self.output()["pymol"].path) # write Results file temp = tempfile.mkdtemp() PDBResult(self.pdb).download(temp) result = Results(protein=Protein.from_file( os.path.join(temp, "{}.pdb".format(self.pdb))), super_grids=ref.dic) out_settings = HotspotWriter.Settings() out_settings.charged = False with HotspotWriter(os.path.dirname(self.output()["grids"].path), grid_extension=".grd", zip_results=True, settings=out_settings) as w: w.write(result) # write aligned molecules with MoleculeWriter(self.output()['aligned_mols'].path) as w: for l in ref.aligned_ligands: w.write(l) # points points = ref._comparision_dict() with open(self.output()['points'].path, 'wb') as w: pickle.dump(points, w)
def from_protein(self, protein, charged_probes=False, probe_size=7, buriedness_method='ghecom', cavities=None, nprocesses=1, settings=None, buriedness_grid=None, clear_tmp=False): """ generates a result from a protein :param protein: a :class:`ccdc.protein.Protein` instance :param bool charged_probes: If True include positive and negative probes :param int probe_size: Size of probe in number of heavy atoms (3-8 atoms) :param str buriedness_method: Either 'ghecom' or 'ligsite' :param cavities: Coordinate or `ccdc.cavity.Cavity` or `ccdc.molecule.Molecule` or list specifying the cavity or cavities on which the calculation should be run :param int nprocesses: number of CPU's used :param `hotspots.calculation.Runner.Settings` settings: holds the sampler settings :param `ccdc.utilities.Grid` buriedness_grid: pre-calculated buriedness grid :return: a :class:`hotspots.result.Results` instance >>> from ccdc.protein import Protein >>> from hotspots.calculation import Runner >>> protein = Protein.from_file(<path_to_protein>) >>> runner = Runner() >>> settings = Runner.Settings() >>> settings.nrotations = 1000 # fewer rotations increase speed at the expense of accuracy >>> runner.from_protein(protein, nprocesses=3, settings=settings) Result() """ start = time.time() self.super_grids = {} self.buriedness = buriedness_grid self.protein = protein self.charged_probes = charged_probes self.probe_size = probe_size self.buriedness_method = buriedness_method self.cavities = cavities self.clear_tmp = clear_tmp print(self.cavities) self.nprocesses = nprocesses if settings is None: self.sampler_settings = self.Settings() else: self.sampler_settings = settings self._calc_hotspots() # return probes = False by default self.super_grids = {p: g[0] for p, g in self.out_grids.items()} print("Runtime = {}seconds".format(time.time() - start)) return Results(super_grids=self.super_grids, protein=self.protein, buriedness=self.buriedness)
def _generate_result(self, path): with PushDir(path): files = set(listdir(path)) # fetch protein - this should always be protein.pdb prot_name = [f for f in files if f.split(".")[1] == self.supported_protein_extensions][0] prot = Protein.from_file(prot_name) files.remove(prot_name) # there should only be one grid extension in the directory, if there are more # then you can manually read in your results grid_extension = {f.split(".")[1] for f in files}.intersection(self.supported_grid_extensions) if len(grid_extension) > 1: raise IndexError("Too many grid types, create `hotspots.result.Results` manually") elif len(grid_extension) < 1: raise IndexError("No supported grid types found") elif list(grid_extension)[0] == "dat": raise NotImplementedError("Will put this in if requested") else: grid_extension = list(grid_extension)[0] # read hotspot grids stripped_files = {f.split(".")[0] for f in files} hotspot_grids = stripped_files.intersection(self.supported_interactions) super_grids = {p: Grid.from_file(f"{p}.{grid_extension}") for p in hotspot_grids} # read superstar grids if len([f.startswith("superstar") for f in files]) > 0 and self.read_superstar: superstar_grids = {p: Grid.from_file(f"superstar_{p}.{grid_extension}") for p in hotspot_grids} else: superstar_grids = None # read weighted_superstar grids if len([f.startswith("weighted") for f in files]) > 0 and self.read_weighted: weighted_grids = {p: Grid.from_file(f"weighted_{p}.{grid_extension}") for p in hotspot_grids} else: weighted_grids = None # fetch buriedness grid try: buriedness_name = [f for f in files if f.startswith("buriedness")][0] except IndexError: buriedness_name = None if buriedness_name and self.read_buriedness: buriedness = Grid.from_file(buriedness_name) else: buriedness = None return Results(super_grids=super_grids, protein=prot, buriedness=buriedness, superstar=superstar_grids, weighted_superstar=weighted_grids, identifier=basename(path))
def read(self, identifier=None): """ creates a single or list of :class:`hotspots.result.Result` instance(s) :param str identifier: for directories containing multiple Fragment Hotspot Map results, identifier is the subdirectory for which a :class:`hotspots.result.Result` is requried :return: `hotspots.result.Result` a Fragment Hotspot Map result >>> from hotspots.hs_io import HotspotReader >>> path = "<path_to_results_directory>" >>> result = HotspotReader(path).read() """ if len(self.hs_dir) == 0: self.grid_dic, self.buriedness = self._get_grids() shutil.rmtree(self._base) return Results(protein=self.protein, super_grids=self.grid_dic, buriedness=self.buriedness) else: hrs = [] if identifier: self.grid_dic, self.buriedness = self._get_grids( sub_dir=str(identifier)) return Results(protein=self.protein, super_grids=self.grid_dic, buriedness=self.buriedness) else: for dir in self.hs_dir: self.grid_dic, self.buriedness = self._get_grids( sub_dir=dir) hrs.append( Results(protein=self.protein, super_grids=self.grid_dic, buriedness=self.buriedness)) shutil.rmtree(self._base) return hrs
def from_superstar(self, protein, superstar_grids, buriedness, charged_probes=False, probe_size=7, settings=None, clear_tmp=False): """ calculate hotspot maps from precalculated superstar maps. This enables more effective parallelisation and reuse of object such as the Buriedness grids :param protein: a :class:`ccdc.protein.Protein` instance :param superstar_grids: a :class:`hotspots.atomic_hotspot_calculation._AtomicHotspotResult` instance :param buriedness: a :class:`hotspots.grid_extension.Grid` instance :param bool charged_probes: If True, include positive and negative probes :param int probe_size: Size of probe in number of heavy atoms (3-8 atoms) :param settings: `hotspots.calculation.Runner.Settings` settings: holds the sampler settings :param bool clear_tmp: If True, clear the temporary directory :return: """ start = time.time() self.super_grids = {} self.superstar_grids = superstar_grids self.probe_types = [p.identifier for p in self.superstar_grids] self.buriedness = buriedness self.protein = protein self.charged_probes = charged_probes self.probe_size = probe_size self.clear_tmp = clear_tmp if settings is None: self.sampler_settings = self.Settings() else: self.sampler_settings = settings self.weighted_grids = self._get_weighted_maps() print("Start sampling") grid_dict = {w.identifier: w.grid for w in self.weighted_grids} for probe in self.probe_types: self._get_out_maps(probe, grid_dict) self.super_grids = {p: g[0] for p, g in self.out_grids.items()} print("Sampling complete\n") print("Runtime = {}seconds".format(time.time() - start)) return Results(super_grids=self.super_grids, protein=self.protein, buriedness=self.buriedness)
def generate_fake(self, buriedness=False, weighted=False, superstar=True): """ create a small set of grids for testing :param buriedness: :param weighted: :param superstar: :return: """ def populate_grid(template, num_spheres, radius=1, value=8, scaling='linear'): h = template.copy_and_clear() for i in range(1, num_spheres): x, y, z = [np.random.randint(low=2, high=ax - 2, size=1) for ax in h.nsteps] h.set_sphere(point=h.indices_to_point(x, y, z), radius=radius, value=value, scaling=scaling) return h protein = Protein.from_file("testdata/6y2g_A/binding_site.pdb") mol = MoleculeReader("testdata/6y2g_A/A_mol.mol2")[0] g = Grid.initalise_grid([a.coordinates for a in mol.atoms]) if buriedness: buriedness_grid = Grid.from_molecule(mol) else: buriedness_grid = None interactions = ["apolar", "donor", "acceptor"] super_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions} if superstar: superstar_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions} else: superstar_grids = None if weighted: weighted_superstar_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions} else: weighted_superstar_grids = None return Results(super_grids=super_grids, protein=protein, buriedness=buriedness_grid, superstar=superstar_grids, weighted_superstar=weighted_superstar_grids)
def test_write_real_single(self): base = "testdata/1hcl" interactions = ["donor", "acceptor", "apolar"] super_grids = {p: Grid.from_file(os.path.join(base, f"{p}.grd")) for p in interactions} superstar_grids = {p: Grid.from_file(os.path.join(base, f"superstar_{p}.grd")) for p in interactions} buriedness = Grid.from_file(os.path.join(base, "buriedness.grd")) prot = Protein.from_file(os.path.join(base, "protein.pdb")) hr = Results(super_grids=super_grids, protein=prot, buriedness=buriedness, superstar=superstar_grids) settings = HotspotWriter.Settings() settings.output_superstar = True with HotspotWriter("testdata/hs_io/minimal_all_grids_real", settings=settings) as w: w.write(hr)
def test_write_pymol_isoslider(self): # read in manually path = "testdata/hs_io/minimal_all_grids/out.zip" base = tempfile.mkdtemp() with zipfile.ZipFile(path) as hs_zip: hs_zip.extractall(base) base = os.path.join(base, "hotspot") interactions = ["donor", "acceptor", "apolar"] super_grids = {p: Grid.from_file(os.path.join(base, f"{p}.grd")) for p in interactions} superstar_grids = {p: Grid.from_file(os.path.join(base, f"superstar_{p}.grd")) for p in interactions} prot = Protein.from_file(os.path.join(base, "protein.pdb")) hr = Results(super_grids=super_grids, protein=prot, superstar=superstar_grids) hr.identifier = "hotspot" settings = HotspotWriter.Settings() settings.output_superstar = True writer = HotspotWriter("testdata/hs_io/minimal_all_grids", settings=settings) # we won't actually write writer.pymol_out.commands += writer._write_pymol_isosurfaces(hr.super_grids, "hotspot", "hotspot", "fhm") writer.pymol_out.commands += writer._write_pymol_isosurfaces(hr.superstar, "hotspot", "hotspot", "superstar") writer._write_pymol_isoslider(hr) writer.pymol_out.write("testdata/hs_io/minimal_all_grids/test_write_pymol_isoslider.py")
def masked_hotspot(base, pdb, hotspot_path): assert os.path.exists(hotspot_path) with HotspotReader(os.path.join(hotspot_path, "out.zip")) as r: hr = [h for h in r.read() if h.identifier == "hotspot"][0] b = (hr.buriedness > 3) * hr.buriedness crystal_lig = MoleculeReader(os.path.join(base, pdb, f"{pdb}_ref.mol2"))[0] g = hr.buriedness.copy_and_clear() for atm in crystal_lig.heavy_atoms: g.set_sphere(point=atm.coordinates, radius=6, value=1, mode="replace", scaling="None") mol_buried = (g & b) * b common_mol_buried = hr.super_grids["apolar"].common_boundaries(mol_buried) apolar = (common_mol_buried & hr.super_grids["apolar"]) * hr.super_grids["apolar"] donor = (common_mol_buried & hr.super_grids["donor"]) * hr.super_grids["donor"] acceptor = (common_mol_buried & hr.super_grids["acceptor"]) * hr.super_grids["acceptor"] return Results(super_grids={ "apolar": apolar, "donor": donor, "acceptor": acceptor }, protein=hr.protein, buriedness=common_mol_buried)
def calc(args): prot_file, hotspot_file = args prot = Protein.from_file(prot_file) # pre prepared runner = Runner() settings = Runner.Settings() settings.apolar_translation_threshold = 8 settings.polar_translation_threshold = 10 # pdb = os.path.basename(prot_file)[0][:4] # # mol_path = os.path.join(os.path.dirname(prot_file)) hr = runner.from_protein(prot, nprocesses=3, settings=settings, probe_size=3) for p, g in hr.super_grids.items(): hr.super_grids[p] = g.dilate_by_atom() try: e = Extractor(hr) bv = e.extract_volume(volume=250) except: bv = Results( protein=hr.protein.copy(), super_grids={p: g.copy() for p, g in hr.super_grids.items()}) hr.identifier = "hotspot" bv.identifier = "bcv" with HotspotWriter(hotspot_file) as w: w.write([hr, bv])
def generate_pharmacophore(ligands, ref_pdb, out_dir): lig_pharms = [] for ligand in ligands: ligand_pharmacophore = LigandPharmacophoreModel() ligand_pharmacophore.feature_definitions = [ "ring", "acceptor_projected", "donor_projected" ] ligand_pharmacophore.detect_from_ligand(ligand) for feat in ligand_pharmacophore.detected_features: ligand_pharmacophore.add_feature(feat) lig_pharms.append(ligand_pharmacophore) # 20 % cutoff = len(ligands) * 0.2 feats, feat_point_grds = create_consensus(lig_pharms, cutoff=cutoff) print(feats) for feat in feats: if feat.identifier == "ring": p = feat.spheres[0].centre feat.spheres = (GeometricDescriptors.Sphere((p[0], p[1], p[2]), 2.0), ) feat.point = feat.spheres[0] ensemble_pharm = LigandPharmacophoreModel() ensemble_pharm.detected_features = feats ensemble_pharm.feature_point_grids = feat_point_grds ensemble_pharm.ligands = ligands ensemble_pharm.detected_features = ensemble_pharm.top_features(num=6) pymol_o = os.path.join(out_dir, "pymol") if not os.path.exists(pymol_o): os.mkdir(pymol_o) ensemble_pharm.pymol_visulisation(pymol_o) # enable rescoring tmp = tempfile.mkdtemp() ftp_download([ref_pdb, tmp]) hr = Results(super_grids={ "apolar": feat_point_grds["ring"], "donor": feat_point_grds["donor_projected"], "acceptor": feat_point_grds["acceptor_projected"] }, protein=Protein.from_file( os.path.join(tmp, f"{ref_pdb}.pdb"))) hr_out = os.path.join(out_dir, "hr") if not os.path.exists(hr_out): os.mkdir(hr_out) with HotspotWriter(hr_out) as w: w.write(hr) p_out = os.path.join(out_dir, "ligand_pharmacophores") if not os.path.exists(p_out): os.mkdir(p_out) for n in [6, 5, 4, 3]: lp = LigandPharmacophoreModel() lp.detected_features = feats lp.detected_features = lp.top_features(num=n) for feat in lp.detected_features: lp.add_feature(feat) lp.intra_only = True lp.write(os.path.join(p_out, f"{n}.cm"))
print target for pdb in pdbs: chain = chains[pdb] ligand_id = ligands[pdb] out_dir = os.path.join(base, target, pdb, "reference") if not os.path.exists(out_dir): os.mkdir(out_dir) try: p = PharmacophoreModel._from_siena(pdb, ligand_id, mode, target, out_dir=out_dir) p.write(os.path.join(out_dir, "reference_pharmacophore.py")) prot = hs_io.HotspotReader( os.path.join(base, target, pdb, "out.zip")).read().protein hs = Results(protein=prot, super_grids=p.dic) with hs_io.HotspotWriter(out_dir) as wf: wf.write(hs) with io.MoleculeWriter(os.path.join(out_dir, "aligned.mol2")) as w: for l in p.representatives: w.write(l) except RuntimeError: print "skipped {}".format(target)
def make_selectivity_maps(self): """ Creates the selectivity maps for the polar and apolar probes. :return: """ diff_maps = self.make_difference_maps() probes_list = ['donor', 'acceptor', 'apolar', 'positive', 'negative'] polar_probes = ['donor', 'acceptor', 'positive', 'negative'] apolar_probes = ['apolar'] for probe in probes_list: try: dmap = diff_maps[probe] if probe in polar_probes: # Find the percentile threshold, if specified perc = np.percentile( dmap[dmap > 0], self.settings.polar_percentile_threshold) # Find clusters in the target and off-target maps clust_map_on = _GridEnsemble.HDBSCAN_cluster( dmap * (dmap > perc), min_cluster_size=self.settings.min_points_cluster_polar ) clust_map_off = _GridEnsemble.HDBSCAN_cluster( dmap * (dmap < -perc), min_cluster_size=self.settings.min_points_cluster_polar ) elif probe in apolar_probes: # Find the percentile threshold, if specified perc = np.percentile( dmap[dmap > 0], self.settings.apolar_percentile_threshold) # Find clusters in the target and off-target maps clust_map_on = _GridEnsemble.HDBSCAN_cluster( dmap * (dmap > perc), min_cluster_size=self.settings. min_points_cluster_apolar, allow_single_cluster=True) clust_map_off = _GridEnsemble.HDBSCAN_cluster( dmap * (dmap < -perc), min_cluster_size=self.settings. min_points_cluster_apolar, allow_single_cluster=True) else: print("Probe type {} not recognised as polar or apolar". format(probe)) continue #Get the center of mass coordinates for the target and off-target coords = self.get_clusters_center_mass(dmap, clust_map_on) minus_coords = self.get_clusters_center_mass( dmap, clust_map_off) for k in coords.keys(): for i in minus_coords.keys(): dist = self.get_distance(coords[k], minus_coords[i]) * 0.5 # print("Plus clust: {}, minus_clust: {}, distance: {}".format(k, i, dist)) if dist < self.settings.cluster_distance_cutoff: self.remove_cluster(clust_map_on, k) self.remove_cluster(clust_map_off, i) # Remove any clusters that don't make the medmian cutoff for c in set(clust_map_on[clust_map_on > 0]): med = np.median(dmap[clust_map_on == c]) if med < self.settings.minimal_cluster_score: self.remove_cluster(clust_map_on, c) for c in set(clust_map_off[clust_map_off > 0]): min_med = np.median(dmap[clust_map_off == c]) if min_med > -self.settings.minimal_cluster_score: self.remove_cluster(clust_map_off, c) ge = _GridEnsemble(dimensions=self.common_grid_dimensions, shape=self.common_grid_nsteps) self.selectivity_maps[probe] = ge.as_grid( (clust_map_on > 0) * dmap) except KeyError: continue self.selectivity_result = Results(super_grids=self.selectivity_maps, protein=self.target.protein)
def make_ensemble_maps(self, save_grid_ensembles=True): """ Creates summary maps for the ensemble based on the settings provided. :return: """ probes_list = ['donor', 'acceptor', 'apolar'] polar_probes = ['donor', 'acceptor'] apolar_probes = ['apolar'] for probe in probes_list: try: # Don't need to create the ensemble array each time (eg if pickled GridEnsembles have been supplied) if probe in self.grid_ensembles.keys(): ge = self.grid_ensembles[probe] else: #probe_grids = [hs.super_grids[probe].max_value_of_neighbours() for hs in self.hotspot_results] probe_grids = [ hs.super_grids[probe] for hs in self.hotspot_results ] ge = _GridEnsemble() ge.make_ensemble_array(probe_grids) if save_grid_ensembles: self.grid_ensembles[probe] = ge if probe in polar_probes: if self.settings.combine_mode == 'median': ens_grid = ge.as_grid( ge.get_median_frequency_map( threshold=self.settings. polar_frequency_threshold)) # The mean and max modes don't currently take into account the frequency elif self.settings.combine_mode in ['mean', 'max']: ens_grid = ge.make_summary_grid( mode=self.settings.combine_mode) else: print( 'Unrecognised mode for combining grids in {} {}: {}' .format(self.ensemble_id, probe, self.settings.combine_mode)) continue elif probe in apolar_probes: ens_grid = ge.make_summary_grid( mode=self.settings.combine_mode) else: print( "Probe type {} in ensemble {} not recognised as polar or apolar" .format(probe, self.ensemble_id)) continue print(probe, ens_grid.nsteps) self.ensemble_maps[probe] = ens_grid # In case of no charged probes except KeyError: continue try: self.ensemble_hotspot_result = Results( super_grids=self.ensemble_maps, protein=self.hotspot_results[0].protein, buriedness=None, pharmacophore=False) except TypeError: self.ensemble_hotspot_result = Results( super_grids=self.ensemble_maps, protein=None, buriedness=None, pharmacophore=False)
def _get_superstar(self, cav_id=None): """ calculate SuperStar for each cavity if the buriedness method is ligsite, write out the grid for later :param cav_id: :return: """ # input prot = Protein.from_file(self.apo_prep) if cav_id is 'global': cavity_origin = None else: with open(self.cavities[cav_id], 'rb') as handle: cavity_origin = [pickle.load(handle)] # tasks start = time.time() a = _AtomicHotspot() a.settings.atomic_probes = {"apolar": "AROMATIC CH CARBON", "donor": "UNCHARGED NH NITROGEN", "acceptor": "CARBONYL OXYGEN"} self.superstar_grids = a.calculate(prot, nthreads=None, cavity_origins=cavity_origin) sr = Results(protein=prot, super_grids={result.identifier: result.grid for result in self.superstar_grids}) finish = time.time() # outputs if not os.path.exists(self.superstar[cav_id]): os.mkdir(self.superstar[cav_id]) if cav_id is not 'global': out = os.path.join(a.settings.temp_dir, str(0)) else: out = a.settings.temp_dir for interaction in ["apolar", "acceptor", "donor"]: shutil.copyfile(os.path.join(out, "{}.cavity.mol2".format(interaction)), os.path.join(self.superstar[cav_id], "{}.cavity.mol2".format(interaction))) shutil.make_archive(os.path.join(self.superstar[cav_id], "superstar"), 'zip', out) with HotspotWriter(path=self.superstar[cav_id], zip_results=True) as w: w.write(sr) with open(self.superstar_time[cav_id], 'w') as t: t.write(str(finish - start)) shutil.rmtree(a.settings.temp_dir) if self.buriedness_method == 'ligsite': # only write if it doesn't exist i.e. the first cavity run if not os.path.exists(self.buriedness): for ss in self.superstar_grids: if ss.identifier == "apolar": ss.buriedness.write(self.buriedness)
def from_pdb(self, pdb_code, charged_probes=False, probe_size=7, buriedness_method='ghecom', nprocesses=3, cavities=False, settings=None, clear_tmp=False): """ generates a result from a pdb code :param str pdb_code: PDB code :param bool charged_probes: If True include positive and negative probes :param int probe_size: Size of probe in number of heavy atoms (3-8 atoms) :param str buriedness_method: Either 'ghecom' or 'ligsite' :param int nprocesses: number of CPU's used :param `hotspots.calculation.Runner.Settings` settings: holds the calculation settings :return: a :class:`hotspots.result.Result` instance >>> from hotspots.calculation import Runner >>> runner = Runner() >>> runner.from_pdb("1hcl") Result() """ protoss = False tmp = tempfile.mkdtemp() # if protoss is True: # protoss = Protoss(out_dir=tmp) # self.protein = protoss.add_hydrogens(pdb_code).protein # # else: PDBResult(identifier=pdb_code).download(out_dir=tmp) fname = join(tmp, "{}.pdb".format(pdb_code)) self.protein = Protein.from_file(fname) self._prepare_protein(protoss) self.charged_probes = charged_probes self.probe_size = probe_size self.buriedness_method = buriedness_method self.clear_tmp = clear_tmp self.cavities = None if cavities is True: self.cavities = Cavity.from_pdb_file(fname) self.nprocesses = nprocesses if settings is None: self.sampler_settings = self.Settings() else: self.sampler_settings = settings if self.sampler_settings.return_probes is True: print('here') self._calc_hotspots(return_probes=True) else: self._calc_hotspots() self.super_grids = {p: g[0] for p, g in self.out_grids.items()} if clear_tmp == True: shutil.rmtree(tmp) return Results(super_grids=self.super_grids, protein=self.protein, buriedness=self.buriedness, superstar={x.identifier: x.grid for x in self.superstar_grids}, weighted_superstar={x.identifier: x.grid for x in self.weighted_grids})