def augmentation(hr, hits): # create a grid which can contain all pharmacophore poses small_blank = Grid.initalise_grid(coords={ atm.coordinates for mol in hits.hits for atm in mol.molecule.heavy_atoms }, padding=3) # dilate the grids for p, g in hr.super_grids.items(): hr.super_grids[p] = g.dilate_by_atom() # inflate prot_g = Grid.initalise_grid( [a.coordinates for a in hr.protein.heavy_atoms], padding=1) for p, g in hr.super_grids.items(): hr.super_grids[p] = prot_g.common_boundaries(g) # shrink hotspot maps to save time sub_grids = { p: Grid.shrink(small=small_blank, big=g) for p, g in hr.super_grids.items() } # create single grid mask_dic, sg = Grid.get_single_grid(sub_grids) hr.super_grids = mask_dic # set background to 1 hr.set_background() hr.normalize_to_max() return hr
def create_projection_grid(self): """ create the projected spheres for the summary pharmacophore features :return: `hotspots.grid_extension.Grid` """ # Do the features have projections ? if len([f for f in self.features if len(f.spheres) > 1]) == 0: return None else: g = Grid.initalise_grid(coords=[ feature.spheres[1].centre for feature in self.features ], spacing=0.25, padding=2) for f in self.features: if len(f.spheres) > 1: px, py, pz = [ f.spheres[1].centre[0], f.spheres[1].centre[1], f.spheres[1].centre[2] ] g.set_sphere(point=[px, py, pz], value=1, radius=2, scaling='linear') self.projection_grid = g return g
def shrink_to_mols(self, mols): """Reduces the grid size to be just large enough to contain all mol objects in mol""" blank_grd = Grid.initalise_grid( [a.coordinates for l in mols for a in l.atoms]) for probe, g in self.super_grids.items(): self.super_grids[probe] = Grid.shrink(blank_grd, g)
def _molecule_as_grid(mol, g=None): """ Produces a grid representation of a molecule split by interaction type :param mol: takes any ccdc molecule :type mol: `ccdc.molecule.Molecule` :param g: a blank grid :type g: `hotspots.grid_extension.Grid` :return: a dictionary of grids by interaction type :rtype: dict """ if not g: g = Grid.initalise_grid(coords=[a.coordinates for a in mol.atoms], padding=3) grid_dict = {"donor": g.copy(), "acceptor": g.copy(), "apolar": g.copy()} for p, g in grid_dict.items(): atms = [a for a in mol.atoms if Helper.get_atom_type(a) == p] for atm in atms: g.set_sphere(point=atm.coordinates, radius=atm.vdw_radius, value=1, scaling='None') return grid_dict
def _from_molecule(self, mol, scaling=1): """ generate a molecule mask where gp within the vdw radius of the molecule heavy atoms are set to 1.0 :param mol: `ccdc.molecule.Molecule` :param padding: int :param scaling: float :return: `hotspots.grid_extension.Grid` """ coords = [a.coordinates for a in mol.atoms] g = Grid.initalise_grid(coords=coords, padding=15, spacing=1) for probe in sorted(self.probe_selem_dict.keys(), reverse=True): for a in mol.heavy_atoms: g.set_sphere(point=a.coordinates, radius=probe * scaling, value=probe, mode='replace', scaling='None') for a in mol.heavy_atoms: g.set_sphere(point=a.coordinates, radius=a.vdw_radius, value=100, mode='replace', scaling='None') out_bound_box = self.out_grid.bounding_box origin_indices = g.point_to_indices(out_bound_box[0]) far_indices = g.point_to_indices(out_bound_box[1]) region = origin_indices + far_indices print(region) return g.sub_grid(region)
def _create_grids(pharmacophores): g = Grid.initalise_grid(coords=[ s.centre for p in pharmacophores for f in p.features for s in f.spheres ], spacing=0.5) fds = {f.identifier for p in pharmacophores for f in p.features} return {fd: g.copy() for fd in fds}
def test_features_to_grid(self): pm = PharmacophoreModel() pm.feature_definitions = ["acceptor"] pts = [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [2.5, 2.5, 2.5], [3.5, 3.5, 3.5]] g = Grid.initalise_grid(pts, padding=3) features = [ Feature(pm.feature_definitions["acceptor"], GeometricDescriptors.Sphere(centre=p, radius=1)) for p in pts ] for f in features: f.point = f.spheres[0] h = _features_to_grid(features, g) # should be 3 peaks, all with a value of 1.0 self.assertEqual(3, len(h.get_peaks(min_distance=1, cutoff=0))) self.assertTrue( all([ h.value_at_point(peak) == 1.0 for peak in h.get_peaks(min_distance=1, cutoff=0) ]))
def __init__(self, settings): self.settings = settings if self.settings.out_grid: self.grid = self.settings.out_grid else: self.grid = Grid.initalise_grid([atom.coordinates for atom in self.settings.protein.atoms], padding=2) self.update_grid()
def main(): # input files ############################# mol_file = "data/gold_docking_poses.sdf" hotspot_files = "data/out.zip" output_file = "data/ranked.sdf" # option 1: rank based on apolar score # sort_on = ["apolar"] # option 2: rank based on donor and acceptor scores sort_on = ["donor", "acceptor"] # option 3: # sort_on = ["simple_score"] ########################################### # read hotspots and molecules mols = [m for m in MoleculeReader(mol_file) ] # so molecules can retain new attributes hr = HotspotReader(hotspot_files).read() for p, g in hr.super_grids.items(): hr.super_grids[p] = g.max_value_of_neighbours() # create a grid which can contain all docking poses small_blank = Grid.initalise_grid( coords={atm.coordinates for mol in mols for atm in mol.heavy_atoms}, padding=2) # set the protein to -1 to detect clashing protein_grid = hr.super_grids["apolar"].copy_and_clear() for atm in hr.protein.atoms: protein_grid.set_sphere(point=atm.coordinates, radius=atm.vdw_radius * 0.9, value=-1, scaling='None') protein_grid = _shrink(small=small_blank, big=protein_grid) # shrink hotspot maps to save time sub_grids = { p: _shrink(small=small_blank, big=g) + protein_grid for p, g in hr.super_grids.items() } # score the mols for i, mol in enumerate(mols): scores = example_score(sub_grids, mol, small_blank) mol.data = scores simple = simple_score(hr, mol) mol.data.update({"simple_score": simple}) ranked_mols = ranked_molecules(mols, sort_on) # output ranked mols in sdf format with data attached _output_sdf(ranked_mols, output_file)
def multi_probes(mol, scaling=1): probe_sizes = [10, 5, 4, 3] coords = [a.coordinates for a in mol.atoms] g = Grid.initalise_grid(coords=coords, padding=2) for probe in probe_sizes: for a in mol.heavy_atoms: g.set_sphere(point=a.coordinates, radius=probe * scaling, value=probe, scaling='None') return g
def get_ligand_grids(self, binding_sites=None): """ Makes hotspot-like grids based on the types of atoms present in the bound ligands of the ensemble :param binding_sites: :return: dictionary of [probe]: ccdc.utilities.Grid objects """ if not binding_sites: binding_sites = self.get_ensemble_binding_sites() ligands = [x for x in b.ligands for b in binding_sites] blank_grd = Grid.initalise_grid( [a.coordinates for l in ligands for a in l.atoms]) feature_dic = { "apolar": blank_grd.copy(), "acceptor": blank_grd.copy(), "donor": blank_grd.copy() } for lig in ligands: atoms = lig.heavy_atoms for a in atoms: if a.is_donor and a.is_acceptor: feature_dic['acceptor'].set_sphere(point=a.coordinates, radius=1, value=1, scaling='None') feature_dic['donor'].set_sphere(point=a.coordinates, radius=1, value=1, scaling='None') elif a.is_acceptor: feature_dic['acceptor'].set_sphere(point=a.coordinates, radius=1, value=1, scaling='None') elif a.is_donor: feature_dic['donor'].set_sphere(point=a.coordinates, radius=1, value=1, scaling='None') else: # Note that right now, all non-donors and acceptors are being labelled as apolar. Problematic? feature_dic['apolar'].set_sphere(point=a.coordinates, radius=1, value=1, scaling='None') return feature_dic
def buriedness_grid(self): closed_g = self._multiscale_closing(self.protein_grid) out_g = self._open_grid(closed_g, 2) * closed_g out_array = out_g.get_array() scaled_g = Grid.initalise_grid(self.out_grid.bounding_box, padding=0, spacing=0.5) scaled_array = resize(out_array, scaled_g.nsteps, anti_aliasing=False) # Future tweaking here final_array = scaled_array return Grid.array_to_grid(final_array.astype(int), scaled_g)
def generate_fake(self, buriedness=False, weighted=False, superstar=True): """ create a small set of grids for testing :param buriedness: :param weighted: :param superstar: :return: """ def populate_grid(template, num_spheres, radius=1, value=8, scaling='linear'): h = template.copy_and_clear() for i in range(1, num_spheres): x, y, z = [np.random.randint(low=2, high=ax - 2, size=1) for ax in h.nsteps] h.set_sphere(point=h.indices_to_point(x, y, z), radius=radius, value=value, scaling=scaling) return h protein = Protein.from_file("testdata/6y2g_A/binding_site.pdb") mol = MoleculeReader("testdata/6y2g_A/A_mol.mol2")[0] g = Grid.initalise_grid([a.coordinates for a in mol.atoms]) if buriedness: buriedness_grid = Grid.from_molecule(mol) else: buriedness_grid = None interactions = ["apolar", "donor", "acceptor"] super_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions} if superstar: superstar_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions} else: superstar_grids = None if weighted: weighted_superstar_grids = {p: populate_grid(template=g, num_spheres=3) for p in interactions} else: weighted_superstar_grids = None return Results(super_grids=super_grids, protein=protein, buriedness=buriedness_grid, superstar=superstar_grids, weighted_superstar=weighted_superstar_grids)
def augmentation(hr, entries): # create a grid which can contain all docking poses coords = set() for i, entry in enumerate(entries): for atm in entry.molecule.heavy_atoms: coords.add(atm.coordinates) if i > 100: break small_blank = Grid.initalise_grid(coords=coords, padding=12) # dilate the grids # for p, g in hr.super_grids.items(): # hr.super_grids[p] = g.dilate_by_atom() # inflate prot_g = Grid.initalise_grid( [a.coordinates for a in hr.protein.heavy_atoms], padding=1) for p, g in hr.super_grids.items(): hr.super_grids[p] = prot_g.common_boundaries(g) # shrink hotspot maps to save time sub_grids = { p: Grid.shrink(small=small_blank, big=g) for p, g in hr.super_grids.items() } # create single grid mask_dic, sg = Grid.get_single_grid(sub_grids) hr.super_grids = mask_dic # set background to 1 hr.set_background() hr.normalize_to_max() return hr
def _get_buriedness_grid(self): """ calculates the buriedness grid (if self.buriedness_method = ligsite, do nothing as we get this grid for free during the SuperStar Calc) :return: None """ # inputs prot = Protein.from_file(self.apo_prep) # tasks start = time.time() coords = [a.coordinates for a in prot.atoms] out_grid = Grid.initalise_grid(coords=coords, padding=2, spacing=0.5) if self.buriedness_method == 'ghecom': b = Buriedness(protein=prot, out_grid=out_grid) g = b.calculate().grid shutil.rmtree(b.settings.working_directory) elif self.buriedness_method == 'ghecom_internal': b = ExpBuriedness(prot=prot, out_grid=out_grid) g = b.buriedness_grid() elif self.buriedness_method == 'ligsite': g = None pass else: raise TypeError("Not a valid pocket detection method") finish = time.time() # outputs with open(self.buriedness_time, 'w') as t: t.write(str(finish - start)) if self.buriedness_method == 'ligsite': pass else: if not os.path.exists(os.path.dirname(self.buriedness)): os.mkdir(os.path.dirname(self.buriedness)) g.write(self.buriedness)
def random_grid(num_of_spheres, return_coords=False, radius=1, value=8, scaling='linear'): # something in around the 6Y2G binging site (might be needed later) mol = MoleculeReader("testdata/6y2g_A/A_mol.mol2")[0] g = Grid.initalise_grid([a.coordinates for a in mol.atoms]) for i in range(num_of_spheres): pnt = [ np.random.randint(low=2, high=ax - 2, size=1) for ax in g.nsteps ] g.set_sphere(point=g.indices_to_point(pnt[0], pnt[1], pnt[2]), radius=radius, value=value, scaling=scaling) if return_coords: return g, else: return g
def testscore_atoms_as_spheres(self): with PushDir("testdata/result/data"): mols = [m for m in MoleculeReader("gold_docking_poses.sdf")] # create a grid which can contain all docking poses small_blank = Grid.initalise_grid(coords={ atm.coordinates for mol in mols for atm in mol.heavy_atoms }, padding=2) # read hotspot maps with HotspotReader(path="out.zip") as r: self.result = r.read() # dilate the grids for p, g in self.result.super_grids.items(): self.result.super_grids[p] = g.dilate_by_atom() # shrink hotspot maps to save time sub_grids = { p: Grid.shrink(small=small_blank, big=g) for p, g in self.result.super_grids.items() } # create single grid mask_dic, sg = Grid.get_single_grid(sub_grids) self.result.super_grids = mask_dic # set background to 1 self.result.set_background() self.result.normalize_to_max() print([g.extrema for p, g in self.result.super_grids.items()]) for m in mols[:1]: s = self.result.score_atoms_as_spheres(m, small_blank) print(s)
def from_ligands(ligands, identifier, protein=None, settings=None): """ creates a Pharmacophore Model from a collection of overlaid ligands :param `ccdc,molecule.Molecule` ligands: ligands from which the Model is created :param str identifier: identifier for the Pharmacophore Model :param `ccdc.protein.Protein` protein: target system that the model has been created for :param `hotspots.hs_pharmacophore.PharmacophoreModel.Settings` settings: Pharmacophore Model settings :return: :class:`hotspots.hs_pharmacophore.PharmacophoreModel` >>> from ccdc.io import MoleculeReader >>> from hotspots.hs_pharmacophore import PharmacophoreModel >>> mols = MoleculeReader("ligand_overlay_model.mol2") >>> model = PharmacophoreModel.from_ligands(mols, "ligand_overlay_pharmacophore") >>> # write to .json and search in pharmit >>> model.write("model.json") """ cm_dic = crossminer_features() blank_grd = Grid.initalise_grid( [a.coordinates for l in ligands for a in l.atoms]) feature_dic = { "apolar": blank_grd.copy(), "acceptor": blank_grd.copy(), "donor": blank_grd.copy() } if not settings: settings = PharmacophoreModel.Settings() if isinstance(ligands[0], Molecule): temp = tempfile.mkdtemp() with io.MoleculeWriter(join(temp, "ligs.mol2")) as w: for l in ligands: w.write(l) ligands = list(io.CrystalReader(join(temp, "ligs.mol2"))) try: Pharmacophore.read_feature_definitions() except: raise ImportError("Crossminer is only available to CSD-Discovery") feature_definitions = [ fd for fd in Pharmacophore.feature_definitions.values() if fd.identifier != 'exit_vector' and fd.identifier != 'heavy_atom' and fd.identifier != 'hydrophobe' and fd.identifier != 'fluorine' and fd.identifier != 'bromine' and fd.identifier != 'chlorine' and fd.identifier != 'iodine' and fd.identifier != 'halogen' ] for fd in feature_definitions: detected = [fd.detect_features(ligand) for ligand in ligands] all_feats = [f for l in detected for f in l] if not all_feats: continue for f in all_feats: feature_dic[cm_dic[fd.identifier]].set_sphere( f.spheres[0].centre, f.spheres[0].radius, 1) features = [] for feat, feature_grd in feature_dic.items(): peaks = feature_grd.get_peaks(min_distance=4, cutoff=1) for p in peaks: coords = Coordinates(p[0], p[1], p[2]) projected_coordinates = None if feat == "donor" or feat == "acceptor": if protein: projected_coordinates = _PharmacophoreFeature.get_projected_coordinates( feat, coords, protein, settings) features.append( _PharmacophoreFeature( projected=None, feature_type=feat, feature_coordinates=coords, projected_coordinates=projected_coordinates, score_value=feature_grd.value_at_coordinate( coords, position=False), vector=None, settings=settings)) return PharmacophoreModel(settings, identifier=identifier, features=features, protein=protein, dic=feature_dic)
def setUp(self): self.protein = Protein.from_file("testdata/1hcl/protein.pdb") self.protein.remove_all_waters() self.protein.add_hydrogens() self.template = Grid.initalise_grid( [atm.coordinates for atm in self.protein.atoms])