def _get_volume_overlap(self, cav_id, other_id, lig_id): """ find the highest median bcv from all cavities, calculate percentage over between the best bcv and each query ligand :return: """ def nonzero(val): if val == 0: return 1 else: return val # inputs mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0] path1 = os.path.join(self.hotspot[cav_id], "out.zip") path2 = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip") thresholds = [10, 14, 17] if os.path.exists(path1) and os.path.exists(path2): bcv = HotspotReader(path2).read() hot = HotspotReader(path1).read() # tasks other = Grid.from_molecule(mol) bcv_sg = Grid.get_single_grid(bcv.super_grids, mask=False) bcv_overlap = bcv_sg._mutually_inclusive(other=other).count_grid() lig_vol = (other > 0).count_grid() bcv_vol = (bcv_sg > 0).count_grid() hot_sgs = [(Grid.get_single_grid(hot.super_grids, mask=False) > t) for t in thresholds] hot_vols = [nonzero(hot_sg.count_grid()) for hot_sg in hot_sgs] hot_overlap = [hot_sg._mutually_inclusive(other=other).count_grid() for hot_sg in hot_sgs] # output with open(self.bcv_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer: writer.write(str((bcv_overlap / lig_vol) * 100)) with open(self.bcv_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer: writer.write(str((bcv_overlap / bcv_vol) * 100)) with open(self.hot_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer: hot_lig = [str((a / lig_vol) * 100) for a in hot_overlap] print(hot_lig) writer.write(",".join(hot_lig)) with open(self.hot_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer: hot_hot = [str((hot_overlap[i] / hot_vols[i]) * 100) for i in range(len(thresholds))] writer.write(",".join(hot_hot)) else: print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
def __init__(self, hr, settings=None): if settings is None: self.settings = self.Settings() else: self.settings = settings self._single_grid = None self._masked_dic = None self.out_dir = None self.extracted_hotspots = None self.threshold = None try: hr.super_grids["negative"] = hr.super_grids[ "negative"].deduplicate(hr.super_grids["acceptor"], threshold=10, tolerance=2) hr.super_grids["positive"] = hr.super_grids[ "positive"].deduplicate(hr.super_grids["donor"], threshold=10, tolerance=2) except KeyError: pass self.hotspot_result = hr self._masked_dic, self._single_grid = Grid.get_single_grid( self.hotspot_result.super_grids)
def tractability_workflow(protein, tag): """ A very simple tractability workflow. :param str protein: PDB identification code :param str tag: Tractability tag: either 'druggable' or 'less-druggable' :return: `pandas.DataFrame` """ # 1) calculate Fragment Hotspot Result runner = Runner() result = runner.from_pdb(pdb_code=protein, nprocesses=1, buriedness_method='ghecom') # 2) calculate Best Continuous Volume extractor = Extractor(hr=result) bcv_result = extractor.extract_volume(volume=500) # 3) find the median score grid = Grid.get_single_grid(bcv_result.super_grids, mask=False) values = grid.grid_values(threshold=5) median = np.median(values) # 4) return the data return pd.DataFrame({ 'scores': values, 'pdb': [protein] * len(values), 'median': [median] * len(values), 'tractability': [tag] * len(values), })
def augmentation(hr, hits): # create a grid which can contain all pharmacophore poses small_blank = Grid.initalise_grid(coords={ atm.coordinates for mol in hits.hits for atm in mol.molecule.heavy_atoms }, padding=3) # dilate the grids for p, g in hr.super_grids.items(): hr.super_grids[p] = g.dilate_by_atom() # inflate prot_g = Grid.initalise_grid( [a.coordinates for a in hr.protein.heavy_atoms], padding=1) for p, g in hr.super_grids.items(): hr.super_grids[p] = prot_g.common_boundaries(g) # shrink hotspot maps to save time sub_grids = { p: Grid.shrink(small=small_blank, big=g) for p, g in hr.super_grids.items() } # create single grid mask_dic, sg = Grid.get_single_grid(sub_grids) hr.super_grids = mask_dic # set background to 1 hr.set_background() hr.normalize_to_max() return hr
def score_hotspot(self, threshold=5, percentile=50): """ Hotspot scored on the median value of all points included in the hotspot. NB: grid point with value < 5 are ommited from fragment hotspot map (hence the default threshold) :param percentile: :return: """ sg = Grid.get_single_grid(self.hotspot_result.super_grids, mask=False) return sg.grid_score(threshold=threshold, percentile=percentile)
def _calc_hotspots(self, return_probes=False): """ handles the organisation of the hotspot calculation :param return_probes: optional, bool indicating if probe molecules should be returned :return: """ print("Start atomic hotspot detection\n Processors: {}".format(self.nprocesses)) a = AtomicHotspot() a.settings.atomic_probes = {"apolar": "AROMATIC CH CARBON", "donor": "UNCHARGED NH NITROGEN", "acceptor": "CARBONYL OXYGEN"} if self.charged_probes: a.settings.atomic_probes = {"negative": "CARBOXYLATE OXYGEN", "positive": "CHARGED NH NITROGEN"} probe_types = a.settings.atomic_probes.keys() self.superstar_grids = a.calculate(protein=self.protein, nthreads=self.nprocesses, cavity_origins=self.cavities) print("Atomic hotspot detection complete\n") print("Start buriedness calcualtion") if self.buriedness_method.lower() == 'ghecom' and self.buriedness is None: print(" method: Ghecom") out_grid = self.superstar_grids[0].buriedness.copy_and_clear() b = Buriedness(protein=self.protein, out_grid=out_grid) self.buriedness = b.calculate().grid elif self.buriedness_method.lower() == 'ligsite' and self.buriedness is None: print(" method: LIGSITE") self.buriedness = Grid.get_single_grid(grd_dict={s.identifier: s.buriedness for s in self.superstar_grids}, mask=False) self.weighted_grids = self._get_weighted_maps() print("Buriedness calcualtion complete\n") print("Start sampling") grid_dict = {w.identifier: w.grid for w in self.weighted_grids} for probe in probe_types: if return_probes: self.sampled_probes.update(probe, self._get_out_maps(probe, grid_dict)) else: self._get_out_maps(probe, grid_dict) print("Sampling complete\n")
def percentage_matched_atoms(self, mol, threshold, match_atom_types=True): """ for a given molecule, the 'percentage match' is given by the percentage of atoms which overlap with the hotspot result (over a given overlap threshol) :param mol: :param threshold: :param match_atom_types: :return: """ matched_atom_count = 0 if match_atom_types: atom_type_dic = {} for n, g in self.super_grids.items(): # if an atom is a donor and acceptor consider overlap twice atms = [ a for a in mol.heavy_atoms if self.get_atom_type(a) == n or ( (n == 'donor' or n == 'acceptor') and self.get_atom_type(a) == 'doneptor') ] if len(atms) > 0: matched = g.matched_atoms(atoms=atms, threshold=threshold) matched_atom_count += len(matched) atom_type_dic.update({n: matched}) print("heavy atoms matched: {}/{}".format(matched_atom_count, len(mol.heavy_atoms))) print("breakdown by atom type", str(atom_type_dic)) return round((matched_atom_count / len(mol.heavy_atoms)) * 100, 1), atom_type_dic else: sg = Grid.get_single_grid(self.super_grids, mask=False) matched = sg.matched_atoms(atoms=mol.heavy_atoms, threshold=threshold) matched_atom_count += len(matched) print("heavy atoms matched: {}/{}".format(matched_atom_count, len(mol.heavy_atoms))) return round((matched_atom_count / len(mol.heavy_atoms)) * 100, 1)
def __init__(self, hr, settings=None): if settings is None: self.settings = self.Settings() else: self.settings = settings self._single_grid = None self._masked_dic = None self.out_dir = None self.extracted_hotspots = None self.threshold = None if self.settings.mvon is True: hr.super_grids.update({ probe: g.max_value_of_neighbours() for probe, g in hr.super_grids.items() }) #hr.super_grids.update({probe: g.dilate_by_atom() for probe, g in hr.super_grids.items()}) try: hr.super_grids["negative"] = hr.super_grids[ "negative"].deduplicate(hr.super_grids["acceptor"], threshold=10, tolerance=2) hr.super_grids["positive"] = hr.super_grids[ "positive"].deduplicate(hr.super_grids["donor"], threshold=10, tolerance=2) except KeyError: pass try: hr.super_grids.update( {probe: g.minimal() for probe, g in hr.super_grids.items()}) except RuntimeError: pass self.hotspot_result = hr self._masked_dic, self._single_grid = Grid.get_single_grid( self.hotspot_result.super_grids)
def _docking_fitting_pts(self, _best_island=None, threshold=17): """ :return: """ if _best_island: single_grid = _best_island else: single_grid = Grid.get_single_grid(self.super_grids, mask=False) dic = single_grid.grid_value_by_coordinates(threshold=threshold) mol = Molecule(identifier="constraints") for score, v in dic.items(): for pts in v: atm = Atom(atomic_symbol='C', atomic_number=14, label='{:.2f}'.format(score), coordinates=pts) atm.partial_charge = score mol.add_atom(atom=atm) return mol
def testscore_atoms_as_spheres(self): with PushDir("testdata/result/data"): mols = [m for m in MoleculeReader("gold_docking_poses.sdf")] # create a grid which can contain all docking poses small_blank = Grid.initalise_grid(coords={ atm.coordinates for mol in mols for atm in mol.heavy_atoms }, padding=2) # read hotspot maps with HotspotReader(path="out.zip") as r: self.result = r.read() # dilate the grids for p, g in self.result.super_grids.items(): self.result.super_grids[p] = g.dilate_by_atom() # shrink hotspot maps to save time sub_grids = { p: Grid.shrink(small=small_blank, big=g) for p, g in self.result.super_grids.items() } # create single grid mask_dic, sg = Grid.get_single_grid(sub_grids) self.result.super_grids = mask_dic # set background to 1 self.result.set_background() self.result.normalize_to_max() print([g.extrema for p, g in self.result.super_grids.items()]) for m in mols[:1]: s = self.result.score_atoms_as_spheres(m, small_blank) print(s)
def augmentation(hr, entries): # create a grid which can contain all docking poses coords = set() for i, entry in enumerate(entries): for atm in entry.molecule.heavy_atoms: coords.add(atm.coordinates) if i > 100: break small_blank = Grid.initalise_grid(coords=coords, padding=12) # dilate the grids # for p, g in hr.super_grids.items(): # hr.super_grids[p] = g.dilate_by_atom() # inflate prot_g = Grid.initalise_grid( [a.coordinates for a in hr.protein.heavy_atoms], padding=1) for p, g in hr.super_grids.items(): hr.super_grids[p] = prot_g.common_boundaries(g) # shrink hotspot maps to save time sub_grids = { p: Grid.shrink(small=small_blank, big=g) for p, g in hr.super_grids.items() } # create single grid mask_dic, sg = Grid.get_single_grid(sub_grids) hr.super_grids = mask_dic # set background to 1 hr.set_background() hr.normalize_to_max() return hr
def single_grid_result(self): _masked_dic, _single_grid = Grid.get_single_grid(self.super_grids) grid_dict = _single_grid.inverse_single_grid(_masked_dic) self.super_grids = grid_dict