def cavities(self, obj): """ optional settings, cavities supplied to the calculation :param list or Coordinate or `ccdc.molecule.Molecule` or `ccdc.cavity.Cavity`: cavity information provided :return: """ if obj is not None: if isinstance(obj, list) or isinstance(obj, tuple): if isinstance(obj, Coordinates): try: print(obj.x) self._cavities = [obj] except AttributeError: self._cavities = obj self._cavities = [obj] elif isinstance(obj[0], Molecule): self._cavities = [m.centre_of_geometry() for m in obj] elif isinstance(obj[0], Cavity): self._cavities = [Helper.cavity_centroid(c) for c in obj] else: print("WARNING! Failed to detected cavity, Atomic Hotspot detection to run on whole protein") self._cavities = None elif isinstance(obj, Molecule): self._cavities = [obj.centre_of_geometry()] elif isinstance(obj, Cavity): self._cavities = [Helper.cavity_centroid(obj)] else: print("WARNING! Failed to detected cavity, Atomic Hotspot detection to run on whole protein") self._cavities = None else: self._cavities = None
def get_projected_coordinates(feature_type, feature_coordinates, protein, settings): """ for a given polar feature, the nearest h-bonding partner on the protein is located. :param protein: a :class:`ccdc.protein.Protein` instance :return: feature_coordinates for hydrogen-bonding partner """ if feature_type == 'donor': atms = [a for a in protein.atoms if a.is_acceptor] else: atms = [a for a in protein.atoms if a.is_donor] near_atoms = {} for atm in atms: dist = Helper.get_distance(atm.coordinates, feature_coordinates) if dist < settings.max_hbond_dist: if dist in near_atoms.keys(): near_atoms[dist].append(atm) else: near_atoms.update({dist: [atm]}) else: continue if len(near_atoms.keys()) == 0: return None else: closest = sorted(near_atoms.keys())[0] select = near_atoms[closest][0] return select.coordinates
def _molecule_as_grid(mol, g=None): """ Produces a grid representation of a molecule split by interaction type :param mol: takes any ccdc molecule :type mol: `ccdc.molecule.Molecule` :param g: a blank grid :type g: `hotspots.grid_extension.Grid` :return: a dictionary of grids by interaction type :rtype: dict """ if not g: g = Grid.initalise_grid(coords=[a.coordinates for a in mol.atoms], padding=3) grid_dict = {"donor": g.copy(), "acceptor": g.copy(), "apolar": g.copy()} for p, g in grid_dict.items(): atms = [a for a in mol.atoms if Helper.get_atom_type(a) == p] for atm in atms: g.set_sphere(point=atm.coordinates, radius=atm.vdw_radius, value=1, scaling='None') return grid_dict
def neighbourhood(i, j, k, high, catchment=1): """ find the neighbourhood of a given indice. Neighbourhood is defined by all points within 1 step of the specified indice. This includes the cubic diagonals. :param i: i indice :param j: j indice :param k: k indice :param catchment: number of steps from the centre :type i: int :type j: int :type k: int :type catchment: int :return: indices of the neighbourhood :rtype: list """ low = (0, 0, 0) i_values = [ a for a in range(i - catchment, i + catchment + 1) if low[0] <= a < high[0] ] j_values = [ b for b in range(j - catchment, j + catchment + 1) if low[1] <= b < high[1] ] k_values = [ c for c in range(k - catchment, k + catchment + 1) if low[2] <= c < high[2] ] return [[a, b, c] for a in i_values for b in j_values for c in k_values if Helper.get_distance([a, b, c], [i, j, k]) == 1]
def _get_atomic_overlap(self, cav_id, other_id, lig_id): """ find the highest median bcv from all cavities, calculate percentage over between the best bcv and each query ligand :return: """ # inputs mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0] path = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip") if os.path.exists(path): hr = HotspotReader(path).read() # tasks out = hr.atomic_volume_overlap(mol) else: print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id)) out = {"donor": {}, "acceptor": {}, "apolar": {}} for a in mol.heavy_atoms: t = Helper.get_atom_type(a) if t == "doneptor": out["donor"].update({a.label: 0.0}) out["acceptor"].update({a.label: 0.0}) else: out[t].update({a.label: 0.0}) # output with open(self.atomic_overlaps[cav_id][other_id][lig_id], 'w') as writer: writer.write(str(out))
def deduplicate(self, major, threshold=12, tolerance=2): """ method to deduplicate two grids, used for charged-polar deduplication :param `ccdc.utilities.Grid` major: overriding grid :param int threshold: values above this value :param int tolerance: search radius for determining feature overlap :return: """ if self.bounding_box[0] != major.bounding_box[0] or self.bounding_box[1] != major.bounding_box[1]: self = major.common_boundaries(self) all_islands = set([jsland for jsland in self.islands(threshold=threshold)]) bin_islands = set([jsland for jsland in all_islands for island in major.islands(threshold=threshold) if jsland.contains_point(island.centroid(), tolerance=tolerance) or jsland.count_grid() <= 8 or Helper.get_distance(jsland.centroid(), island.centroid()) < 4]) retained_jslands = list(all_islands - bin_islands) if len(retained_jslands) == 0: blank = major.copy_and_clear() return blank else: temp = Grid.super_grid(0, *retained_jslands) blank = self.copy_and_clear() return blank.common_boundaries(temp)
def _single_write(self, path, hr): hr.out_dir = Helper.get_out_dir(join(path, hr.identifier)) self._write_grids(hr) self._write_protein(hr.out_dir, hr.protein) relpath = f'{hr.identifier}' self._write_pymol_objects(relpath, hr)
def _single_write(self, path, hr): hr.out_dir = Helper.get_out_dir(join(path, hr.identifier)) self._write_grids(hr) self._write_protein(hr.out_dir, hr.protein) if basename(path) == 'out' and self.zip_results==False: relpath = join(basename(path), f'{hr.identifier}') else: relpath = f'{hr.identifier}' self._write_pymol_objects(relpath, hr)
def get_priority_atom(self, molecule): """ Select priority atom. Select polar atom. If multiple polar atoms, select the one furthest from the centre of geometry. If no polar atoms, select atom furthest from centre of geometry :param molecule: a :class: `ccdc.molecule.Molecule` instance :return: a :class: `ccdc.molecule.Molecule` instance, str atom type """ c = molecule.centre_of_geometry() polar_atoms = [ a for a in molecule.atoms if a.is_donor or a.is_acceptor ] atom_by_distance = {} if len(polar_atoms) > 0: for a in polar_atoms: d = Helper.get_distance(c, a.coordinates) atom_by_distance[d] = a else: for a in molecule.atoms: d = Helper.get_distance(c, a.coordinates) atom_by_distance[d] = a greatest_distance = sorted(atom_by_distance.keys())[0] priority_atom = atom_by_distance[greatest_distance] pa_type = None if priority_atom.formal_charge != 0: if priority_atom.formal_charge < 0: pa_type = "negative" elif priority_atom.formal_charge > 0: pa_type = "positive" else: if priority_atom.is_acceptor: pa_type = "acceptor" elif priority_atom.is_donor: pa_type = "donor" else: pa_type = "apolar" return priority_atom, pa_type
def update_grid(self): """ reads the output file from the pocket detection and assigns values to a grid :return: None """ lines = Helper.get_lines_from_file(self.settings.out_name) for line in lines: if line.startswith("HETATM"): coordinates = (float(line[31:38]), float(line[39:46]), float(line[47:54])) rinacc = float(line[61:66]) i, j, k = self.grid.point_to_indices(coordinates) x, y, z = self.grid.nsteps if 0 < i < x and 0 < j < y and 0 < k < z: self.grid.set_value(i, j, k, 9.5 - rinacc)
def to_grid(target, pdb): out_dir = "Z:/patel_set/{}/{}".format(target, pdb) mols = MoleculeReader( join(out_dir, "reference_pharmacophore", "aligned_mols.mol2")) p = PharmacophoreModel.from_ligands(ligands=mols, identifier="test") result = Results(super_grids=p.dic, protein=Protein.from_file( join(out_dir, "hs", "{}.pdb".format(pdb)))) out = Helper.get_out_dir(join(out_dir, "reference_pharmacophore", "grids")) settings = HotspotWriter.Settings() settings.isosurface_threshold = [2, 5, 10] with HotspotWriter(path=out, zip_results=True, settings=settings) as w: w.write(result)
def write(self, hr): """ writes the Fragment Hotspot Maps result to the output directory and create the pymol visualisation file :param hr: a Fragment Hotspot Maps result or list of results :type hr: `hotspots.result.Result` >>> from hotspots.calculation import Runner >>> from hotspots.hs_io import HotspotWriter >>> r = Runner >>> result = r.from_pdb("1hcl") >>> out_dir = <path_to_out> >>> with HotspotWriter(out_dir) as w: >>> w.write(result) """ container = Helper.get_out_dir(join(self.path, self.settings.container)) if isinstance(hr, list): print(hr) if len({h.identifier for h in hr}) != len(hr): # if there are not unique identifiers, create some. for i, h in enumerate(hr): h.identifier = f"hotspot-{i}" for h in hr: self._single_write(container, h) else: if not hr.identifier: hr.identifier = "hotspot" self._single_write(container, hr) self._write_pymol_isoslider(hr) self.pymol_out.commands += PyMOLCommands.background_color( self.settings.bg_color) self.pymol_out.commands += PyMOLCommands.push_to_wd() if self.zip_results: self.compress() self.pymol_out.write(join(self.path, "pymol_file.py"))
def pdb_to_grid(path, template): """ converts pdb file to grid :param path: path to the input PDB :param template: empty grid, NB: must have same grid spec as superstar grids :type path: str :type template: `hotspots.grid_extension.Grid` :return: populated grid :rtype: `hotspots.grid_extension.Grid` """ lines = Helper.get_lines_from_file(path) for line in lines: if line.startswith("HETATM"): coordinates = (float(line[31:38]), float(line[39:46]), float(line[47:54])) rinacc = float(line[61:66]) i, j, k = template.point_to_indices(coordinates) nx, ny, nz = template.nsteps if 0 < i < nx and 0 < j < ny and 0 < k < nz: template.set_value(i, j, k, 9.5 - rinacc) return template
def _get_cavities(self, min_vol): """ detect cavities using Cavity API, generate new directory for each cavity :return: None """ # inputs cavs = [c for c in Cavity.from_pdb_file(self.apo_prep) if c.volume > min_vol] # task for i in range(len(cavs)): create_directory(path=os.path.join(self.working_dir, 'cavity_{}'.format(i))) cav_dic = {os.path.join(self.working_dir, 'cavity_{}'.format(i)): Helper.cavity_centroid(c) for i, c in enumerate(cavs)} cav_volume_dic = {os.path.join(self.working_dir, 'cavity_{}'.format(i), "cavity.volume"): c.volume for i, c in enumerate(cavs)} cav_bb = {os.path.join(self.working_dir, 'cavity_{}'.format(i), "bounding_box.pkl"): c.bounding_box for i, c in enumerate(cavs)} # output for path, origin in cav_dic.items(): with open(os.path.join(path, "cavity_origin.pkl"), 'wb') as handle: pickle.dump(origin, handle) for path, vol in cav_volume_dic.items(): with open(os.path.join(path), 'w') as f: f.write(str(vol)) for path, bb in cav_bb.items(): with open(os.path.join(path), 'wb') as h: pickle.dump(bb, h) # update attr self.runs += ["cavity_{}".format(i) for i in range(len(cavs))] self.cavities = { "cavity_{}".format(p): os.path.join(self.working_dir, 'cavity_{}'.format(p), "cavity_origin.pkl") for p in range(len(cav_dic))} self.cavities_volumes = { "cavity_{}".format(p): os.path.join(self.working_dir, 'cavity_{}'.format(p), "cavity.volume") for p in range(len(cav_dic))} self.cavity_score = { "cavity_{}".format(p): os.path.join(self.working_dir, "cavity_{}".format(p), "cavity.score") for p in range(len(cav_dic))} self.bounding_box = { "cavity_{}".format(p): os.path.join(self.working_dir, 'cavity_{}'.format(p), "bounding_box.pkl") for p in range(len(cav_dic))} self.superstar = {p: os.path.join(self.working_dir, p, "superstar") for p in self.runs} self.superstar_time = {k: os.path.join(v, "time.time") for k, v in self.superstar.items()} self.hotspot = {p: os.path.join(self.working_dir, p, "hotspot") for p in self.runs} self.hotspot_time = {k: os.path.join(v, "time.time") for k, v in self.hotspot.items()} self.bcv = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "volume_{}".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.bcv_time = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "volume_{}".format(k), "time.time") for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.bcv_threshold = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "volume_{}".format(k), "threshold.dat") for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.bcv_lig_overlaps = { i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "lig_overlap_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.bcv_hot_overlaps = { i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "hot_overlap_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.hot_lig_overlaps = { i: {pid: {k: os.path.join(self.working_dir, i, "hotspot", "lig_overlap_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.hot_hot_overlaps = { i: {pid: {k: os.path.join(self.working_dir, i, "hotspot", "hot_overlap_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.atomic_overlaps = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "atomic_overlap_{}.dat".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs} self.matched = {i: {pid: {k: os.path.join(self.working_dir, i, "bcv", "atom_match_{}.percentage".format(k)) for k in self.ligand_id[j]} for j, pid in enumerate(self.protein_id)} for i in self.runs}
def write(self, hr): """ writes the Fragment Hotspot Maps result to the output directory and create the pymol visualisation file :param `hotspots.result.Result` hr: a Fragment Hotspot Maps result or list of results >>> from hotspots.calculation import Runner >>> from hotspots.hs_io import HotspotWriter >>> r = Runner >>> result = r.from_pdb("1hcl") >>> out_dir = <path_to_out> >>> with HotspotWriter(out_dir) as w: >>> w.write(result) """ if isinstance(hr, list): self.settings.grids = list(hr[0].super_grids.keys()) self.settings.container = "hotspot_boundaries" self.number_of_hotspots = len(hr) self.out_dir = Helper.get_out_dir( join(self.path, self.settings.container)) self._write_protein(hr[0].protein) if hr[0].pharmacophore: self.settings.pharmacophore = True # hts = [h.hotspot_result for h in hr] self._write_pymol(hr, self.zipped) for i, hotspot in enumerate(hr): self.out_dir = Helper.get_out_dir( join(self.path, self.settings.container, str(i))) self.settings.isosurface_threshold = [ round(hotspot.threshold, 1) ] bi = (Grid.super_grid( 2, hotspot.best_island).max_value_of_neighbours() > hotspot.threshold) self._write_grids(hotspot.super_grids, buriedness=None, mesh=bi) self._write_protein(hotspot.protein) if hotspot.pharmacophore: self._write_pharmacophore(hotspot.pharmacophore) self._write_pymol(hotspot, False) self.out_dir = dirname(self.out_dir) if self.zipped: self.compress( join(dirname(self.out_dir), self.settings.container)) else: self.settings.grids = list(hr.super_grids.keys()) # self.settings.container = "out" self.number_of_hotspots = 1 self.out_dir = Helper.get_out_dir( join(self.path, self.settings.container)) self._write_grids(hr.super_grids, buriedness=hr.buriedness) self._write_protein(hr.protein) if hr.pharmacophore: self.settings.pharmacophore = True self._write_pharmacophore(hr.pharmacophore) self._write_pymol(hr, self.zipped) if self.zipped: self.compress( join(dirname(self.out_dir), self.settings.container))
def _score_protein_cavity(self, prot): """ (prefered option) score a protein's atoms, values stored as partial charge h_bond_distance between 1.5 - 2.5 A (2.0 A taken for simplicity) This method uses the cavity API to reduce the number of atoms to iterate over. :return: :class:`ccdc.protein.Protein` """ feats = set([f for f in self.hotspot_result.features]) h_bond_distance = 2.0 interaction_pairs = { "acceptor": "donor", "donor": "acceptor", "pi": "apolar", "aliphatic": "apolar", "aromatic": "apolar", "apolar": "apolar", "donor_acceptor": "doneptor", "dummy": "dummy" } cavities = Helper.cavity_from_protein(self.object) for cavity in cavities: for feature in cavity.features: # all cavity residues for atm in feature.residue.atoms: if atm.is_donor is False and atm.is_acceptor is False and atm.atomic_number != 1: score = self.hotspot_result.super_grids[ 'apolar'].get_near_scores(atm.coordinates) if len(score) == 0: score = 0 else: score = max(score) prot.atoms[atm.index].partial_charge = score # polar cavity residues if feature.type == "acceptor" or feature.type == "donor" or feature.type == "doneptor": v = feature.protein_vector translate = tuple( map(h_bond_distance.__mul__, (v.x, v.y, v.z))) c = feature.coordinates coordinates = tuple( map(operator.add, (c.x, c.y, c.z), translate)) if feature.atom: score = [ f.score_value for f in feats if f.grid.contains_point(coordinates, tolerance=2) and f.feature_type == interaction_pairs[ feature.type] ] if len(score) == 0: score = 0 else: score = max(score) print(score) prot.atoms[feature.atom.index].partial_charge = score # score hydrogen atoms (important for GOLD) a = [ a.index for a in prot.atoms[feature.atom.index].neighbours if int(a.atomic_number) == 1 ] if len(a) > 0: for atm in a: prot.atoms[atm].partial_charge = score return prot
def get_polar_cluster_hits(hits_df, clusters_df, hits_dir): """ :param hits_df: :param clusters_df: :return: """ clust_hitlist = {} fu_id_list = [] fu_smiles_list = [] mean_hs_scores = [] for i, row in hits_df.iterrows(): scored_mols = Path(hits_dir, row['followup_id'], 'concat_ranked_docked_ligands_hs-scored.mol2') pose = int(row['pose_id'].split('_')[-1]) ccdc_lig = MoleculeReader(str(scored_mols))[pose] fu_id_list.append(row['pose_id']) fu_smiles_list.append(row['followup_smiles']) mean_hs_scores.append(row['mean_hs_score']) for ic, rowc in clusters_df.iterrows(): probe_type = rowc['probe_type'] if probe_type == 'acceptor': tar_atoms = [a for a in ccdc_lig.heavy_atoms if a.is_acceptor] elif probe_type == 'donor': tar_atoms = [a for a in ccdc_lig.heavy_atoms if a.is_donor] c_coords = rowc['centre_of_mass'] if type(c_coords) is str: x_coord = float(c_coords.split('x=')[1].split(',')[0]) y_coord = float(c_coords.split('y=')[1].split(',')[0]) z_coord = float(c_coords.split('z=')[1].split(')')[0]) c_coords = Coordinates(x=x_coord, y=y_coord, z=z_coord) dists = [ Helper.get_distance(at.coordinates, c_coords) for at in tar_atoms ] if (len(dists) > 0) and (min(dists) < rowc['cluster_radius'] + 1): hit = 1 else: hit = 0 try: # clust_hitlist[rowc['cluster_id']].append((min(dists))) clust_hitlist[rowc['cluster_id']].append(hit) except KeyError: # clust_hitlist[rowc['cluster_id']] = [(min(dists))] clust_hitlist[rowc['cluster_id']] = [hit] scored_df = pd.DataFrame() cols = clusters_df['cluster_id'].values scored_df['followup_id'] = fu_id_list scored_df['followup_smiles'] = fu_smiles_list scored_df['mean_hs_score'] = mean_hs_scores for cl in cols: scored_df[cl] = clust_hitlist[cl] hits_list = [] for _, rowr in scored_df.iterrows(): num_hits = sum(rowr[co] for co in cols) hits_list.append(num_hits) scored_df['number_hits'] = hits_list return scored_df