def create_feature_db(mol_files, outdir, dbname="test"): # because CM is installed in a non-standard location f_defs = os.path.join(os.path.dirname(os.path.dirname(io.csd_directory())), "CSD_CrossMiner/feature_definitions") Pharmacophore.read_feature_definitions(f_defs) sdbs = [] for mol_file in mol_files: # DatabaseInfo is a named tupled (file name, num_strucs, colour) mol_struc = Pharmacophore.FeatureDatabase.DatabaseInfo( mol_file, 0, Colour(0, 255, 0, 255)) # Create structure databases mol_sqlx = os.path.join( outdir, os.path.basename(mol_file).replace('.mol2', '.csdsqlx')) if not os.path.exists(outdir): os.mkdir(outdir) mol_sdb = Pharmacophore.FeatureDatabase.Creator.StructureDatabase( mol_struc, use_crystal_symmetry=False, structure_database_path=mol_sqlx) sdbs.append(mol_sdb) # Create Feature database settings = Pharmacophore.FeatureDatabase.Creator.Settings( feature_definition_directory=f_defs, n_threads=6) creator = Pharmacophore.FeatureDatabase.Creator(settings=settings) db = creator.create(sdbs) db.write(os.path.join(outdir, f"{dbname}.feat"))
def setUp(self): self.parent_dir = "testdata/pharmacophore_extension/PharmacophoreModel" self.fnames = [ "1dmt_ligand.cm", "1r1h_ligand.cm", "1r1j_ligand.cm", "1y8j_ligand.cm" ] self.pharmacophores = [ PharmacophoreModel.from_file(os.path.join(self.parent_dir, f)) for f in self.fnames ] self.cm_dir = os.path.dirname(os.path.dirname(io.csd_directory())) Pharmacophore.read_feature_definitions( os.path.join(self.cm_dir, "CSD_CrossMiner/feature_definitions"))
def detect_features(self, crystal): _csv = ChemistryLib.CrystalStructureView_instantiate(crystal._crystal) _ssc = MotifPharmacophoreLib.MotifPharmacophoreSearchStructureCreator() _ssc.register_components_from_feature_definitions( (self._feature_def, )) _mss = _ssc.search_structure(_csv) _ded = self._feature_def.feature_deducer() _feats = _ded.generate_motif_features( _mss, self._feature_def.component_label()) features = [] for i in range(_feats.end_index()): code, feat_pts = _feats.at(i) for fp in feat_pts: pts = fp.points() spheres = tuple( GeometricDescriptors.Sphere((p[0], p[1], p[2]), 1.0) for p in pts) # Skip duplicates dup = False if len(spheres) == 2: for f in features: if len(f.spheres) == 2: if (f.spheres[0] == spheres[1] and f.spheres[1] == spheres[0]): dup = True break if not dup: feat = Pharmacophore.Feature(self._clone(), *spheres, crystal=crystal) feat.label = f"{crystal.identifier}/{self.identifier}/{i}" features.append(feat) return tuple(features)
def __init__(self): super().__init__() self.cm_dir = os.path.dirname(os.path.dirname(csd_directory())) Pharmacophore.read_feature_definitions(directory=os.path.join( self.cm_dir, "CSD_CrossMiner/feature_definitions")) self.__feature_options = { k: v for k, v in Pharmacophore.feature_definitions.items() } assert len(self.__feature_options) > 1 self.__feature_definitions = self.__feature_options self.tmp = tempfile.mkdtemp() self.__identifier = None self.__ligands = None self.__protein = None self.__detected_features = None self.__feature_point_grids = None
def __init__(self, features=None, _motif_pharmacophore=None): super().__init__(features=features, _motif_pharmacophore=_motif_pharmacophore) self.cm_dir = os.path.dirname(os.path.dirname(csd_directory())) feat_db = os.environ.get( "CCDC_CROSSMINER_FEATURE_DEFINITIONS", os.path.join(self.cm_dir, "../CSD_CrossMiner/feature_definitions")) Pharmacophore.read_feature_definitions(directory=feat_db) self.__feature_options = { k: v for k, v in Pharmacophore.feature_definitions.items() } assert len(self.__feature_options) > 1 self.__feature_definitions = self.__feature_options self.tmp = tempfile.mkdtemp() self.__identifier = None self.__ligands = None self.__protein = None self.__detected_features = None self.__feature_point_grids = None
def create_new_features(self): """ create new features from points and projections over the cutoff value :return: list of (`hotspots.pharmacophore_extension.Feature`) """ new_feats = [] point = GeometricDescriptors.Sphere(centre=self.point, radius=1) if len(self.projection_peaks) == 0: feat = Pharmacophore.Feature(self.feature_def, point) feat.point = point feat.score = self.value new_feats.append(feat) else: for projection in self.projection_peaks: proj = GeometricDescriptors.Sphere(centre=projection.point, radius=1) feat = Pharmacophore.Feature(self.feature_def, point, proj) feat.point = point feat.score = self.value feat.projected = proj feat.projected_value = projection.value new_feats.append(feat) return new_feats
def _get_crossminer_pharmacophore(self): """ convert a PharmacophoreModel into a crossminer pharmacophore """ # TODO: UPDATE WITH CHARGED FEATURES supported_features = {"acceptor_projected": "acceptor", "donor_projected": "donor", "ring": "apolar"} try: Pharmacophore.read_feature_definitions() except: raise ImportError("Crossminer is only available to CSD-Discovery") feature_definitions = {supported_features[fd.identifier]: fd for fd in Pharmacophore.feature_definitions.values() if fd.identifier in supported_features.keys()} model_features = [] for feat in self._features: if feat.feature_type == "negative" or feat.feature_type == "positive": print("Charged feature not currently supported in CrossMiner: Its on the TODO list") else: sphere = GeometricDescriptors.Sphere(feat.feature_coordinates, self.settings.radius) if feat.projected_coordinates: projected = GeometricDescriptors.Sphere(feat.projected_coordinates, self.settings.radius) p = Pharmacophore.Feature(feature_definitions[feat.feature_type], *[sphere, projected]) else: p = Pharmacophore.Feature(feature_definitions[feat.feature_type], sphere) model_features.append(p) if self.settings.excluded_volume: if not self.protein: print("Pharmacophore Model must have protein to calculate excluded volume") else: bs = self._get_binding_site_residues() for residue in bs.residues: mol = None mol = Molecule(identifier="temp_residue") # for a in residue.backbone_atoms: # ev = Pharmacophore.ExcludedVolume(GeometricDescriptors.Sphere(a.coordinates, 2)) # model_features.append(ev) for a in residue.backbone_atoms: mol.add_atom(a) centre = mol.centre_of_geometry() ev = Pharmacophore.ExcludedVolume(GeometricDescriptors.Sphere(centre, 2)) model_features.append(ev) return Pharmacophore.Query(model_features)
def search(feat_db, pharm): settings = Pharmacophore.Search.Settings() settings.max_hit_structures = 50000 settings.max_hits_per_structure = 1 settings.max_hit_rmsd = 5 settings.n_threads = 24 searcher = Pharmacophore.Search(settings) hits = searcher.search(pharm, database=feat_db, verbose=True) hits_processed = HitList() for h in hits: if "ZINC" in h.identifier: activity = 0 else: activity = 1 hits_processed.hits.append( Hit(h.molecule, h.overlay_rmsd, h.identifier, activity)) return hits_processed
def from_ligands(ligands, identifier, protein=None, settings=None): """ creates a Pharmacophore Model from a collection of overlaid ligands :param `ccdc,molecule.Molecule` ligands: ligands from which the Model is created :param str identifier: identifier for the Pharmacophore Model :param `ccdc.protein.Protein` protein: target system that the model has been created for :param `hotspots.hs_pharmacophore.PharmacophoreModel.Settings` settings: Pharmacophore Model settings :return: :class:`hotspots.hs_pharmacophore.PharmacophoreModel` >>> from ccdc.io import MoleculeReader >>> from hotspots.hs_pharmacophore import PharmacophoreModel >>> mols = MoleculeReader("ligand_overlay_model.mol2") >>> model = PharmacophoreModel.from_ligands(mols, "ligand_overlay_pharmacophore") >>> # write to .json and search in pharmit >>> model.write("model.json") """ cm_dic = crossminer_features() blank_grd = Grid.initalise_grid( [a.coordinates for l in ligands for a in l.atoms]) feature_dic = { "apolar": blank_grd.copy(), "acceptor": blank_grd.copy(), "donor": blank_grd.copy() } if not settings: settings = PharmacophoreModel.Settings() if isinstance(ligands[0], Molecule): temp = tempfile.mkdtemp() with io.MoleculeWriter(join(temp, "ligs.mol2")) as w: for l in ligands: w.write(l) ligands = list(io.CrystalReader(join(temp, "ligs.mol2"))) try: Pharmacophore.read_feature_definitions() except: raise ImportError("Crossminer is only available to CSD-Discovery") feature_definitions = [ fd for fd in Pharmacophore.feature_definitions.values() if fd.identifier != 'exit_vector' and fd.identifier != 'heavy_atom' and fd.identifier != 'hydrophobe' and fd.identifier != 'fluorine' and fd.identifier != 'bromine' and fd.identifier != 'chlorine' and fd.identifier != 'iodine' and fd.identifier != 'halogen' ] for fd in feature_definitions: detected = [fd.detect_features(ligand) for ligand in ligands] all_feats = [f for l in detected for f in l] if not all_feats: continue for f in all_feats: feature_dic[cm_dic[fd.identifier]].set_sphere( f.spheres[0].centre, f.spheres[0].radius, 1) features = [] for feat, feature_grd in feature_dic.items(): peaks = feature_grd.get_peaks(min_distance=4, cutoff=1) for p in peaks: coords = Coordinates(p[0], p[1], p[2]) projected_coordinates = None if feat == "donor" or feat == "acceptor": if protein: projected_coordinates = _PharmacophoreFeature.get_projected_coordinates( feat, coords, protein, settings) features.append( _PharmacophoreFeature( projected=None, feature_type=feat, feature_coordinates=coords, projected_coordinates=projected_coordinates, score_value=feature_grd.value_at_coordinate( coords, position=False), vector=None, settings=settings)) return PharmacophoreModel(settings, identifier=identifier, features=features, protein=protein, dic=feature_dic)
from ccdc.pharmacophore import Pharmacophore from ccdc import io import os from shutil import copyfile from ccdc.utilities import Colour, Timer if __name__ == "__main__": outdir = "/home/pcurran/github_packages/pharmacophores/testdata/search/feat_db" f_defs = os.path.join(os.path.dirname(os.path.dirname(io.csd_directory())), "CSD_CrossMiner/feature_definitions") Pharmacophore.read_feature_definitions(f_defs) base = "/local/pcurran/patel/CDK2/screening_files/conformers" mol_files = [ os.path.join(base, f) for f in ["actives_final_chunk0_conf.mol2", "decoys_final_chunk0_conf.mol2"] ] sdbs = [] for mol_file in mol_files: # DatabaseInfo is a named tupled (file name, num_strucs, colour) mol_struc = Pharmacophore.FeatureDatabase.DatabaseInfo( mol_file, 0, Colour(0, 255, 0, 255)) # Create structure databases mol_sqlx = os.path.join( outdir, os.path.basename(mol_file).replace('.mol2', '.csdsqlx')) if not os.path.exists(outdir): os.mkdir(outdir)
def detect_from_arpeggio(self, protein_path, hetid, chain): """ creates a pharmacophore from protein-ligand interactions TODO: This could be cleaner but for time reasons this is good enough. For example the SMARTS definitions between Arpeggio and Crossminer are not identical. Also, the SMARTS "grouping" are subtly different. 1. Create atomtypes using Crossminer 2. Detect bonds using Arepeggio 3. For features in ligand, if bond, create a feature :param protein: :param hetid: :param chainid: :return: """ # Arpeggio needs the protein in file, read the protein to get some information protein = Protein.from_file(protein_path) pdb_code = protein.identifier # assertion 1: protein must be protonated for CrossMiner # assert("H" in {atom.atomic_symbol for atom in protein.atoms[:50]}) # assertion 2: protein must contain the ligand of interest assert (len([ l for l in protein.ligands if l.identifier.split(":")[0] == chain and l.identifier.split(":")[1][:3] == hetid ]) == 1) lig = [ l for l in protein.ligands if l.identifier.split(":")[0] == chain and l.identifier.split(":")[1][:3] == hetid ][0] # CrossMiner needs a `ccdc.crystal.Crystal` crystal_ligand = self._get_crystal(lig) # Run Arpeggio arpeggio = Arpeggio(pdb_code, hetid, chain, protein_path) arpeggio.run() atom_features, ring_features = arpeggio.create_feature_list() interaction_features = [] interaction_features.extend(atom_features) interaction_features.extend(ring_features) # CrossMiner atom-typing new_features = [] ipoints = np.array([ to_array(interaction.point) for interaction in interaction_features ]) for identifier, fd in self.feature_definitions.items(): feats = fd.detect_features(crystal_ligand) fpoints = np.array( [to_array(feat.point[0].centre) for feat in feats]) # find the overlapping points ipoint_index, fpoint_index = np.where( distance.cdist(ipoints, fpoints) < 0.01) for i, f in zip(ipoint_index, fpoint_index): if identifier is "ring": fd = Pharmacophore.feature_definitions[ "ring_planar_projected"] # sphere from CrossMiner point = GeometricDescriptors.Sphere(fpoints[f], 1) # sphere from Arpeggio projected = GeometricDescriptors.Sphere( to_array(interaction_features[i].projected), 1) print(point, projected) new = Pharmacophore.Feature(fd, point, projected) new.point = point new.point_identifier = interaction_features[i].point_identifier new.projected = projected new.projected_identifier = interaction_features[ i].projected_identifier print(new.projected_identifier) new_features.append(new) print(len(new_features)) self.detected_features = new_features self.protein = self._get_crystal(protein) self.ligands = [crystal_ligand]
def from_hotspot(self, hr, projections=True, min_distance=2, radius_dict={"apolar": 2.5}, sigma=1, override=True): interaction_dict = { "donor": ["acceptor_projected"], "acceptor": ["donor_projected", "donor_ch_projected"] # "apolar": ["ring_planar_projected"] } hotspot_to_cm = { "projected": { "apolar": "ring_planar_projected", "donor": "donor_projected", "acceptor": "acceptor_projected" }, "non-projected": { "apolar": "ring", "donor": "donor_projected", "acceptor": "acceptor" }, } # get peaks features = [] for p, g in hr.super_grids.items(): # peak as a sphere # Keep consistent with vis if p == "apolar": h = g.max_value_of_neighbours() h = h.gaussian(sigma=sigma + 0.5) # all_peaks = h.get_peaks(min_distance=min_distance + 2, cutoff=5) else: h = g.max_value_of_neighbours() h = h.gaussian(sigma=sigma) # all_peaks = h.get_peaks(min_distance=min_distance, cutoff=5) if p in radius_dict: radius = radius_dict[p] else: radius = 1.5 for peak in all_peaks: point = GeometricDescriptors.Sphere(centre=peak, radius=radius) score = h.value_at_point(peak) if p != "apolar" and projections: # binding site from point (within 4/5 angstrom of peak) binding_site = hr.protein.copy() bs = Protein.BindingSiteFromPoint(hr.protein, peak, distance=6) for r in ({r.identifier for r in binding_site.residues} - {r.identifier for r in bs.residues}): binding_site.remove_residue(r) # detect projected features pm = ProteinPharmacophoreModel() pm.feature_definitions = interaction_dict[p] pm.detect_from_prot(binding_site) feats = pm.detected_features # This returns multiple: ATM the user will then select which one (semi-automated) # TODO: implement method to pick the best projection projs = select_projections(feats, np.array([peak]), tolerance=4) else: projs = None if projs is None: # no projections if p == "donor": print( "warning! feature: donor projection used without projection. Will not work in CrossMiner" ) if override: print("here") f = Pharmacophore.Feature( self.feature_definitions[ hotspot_to_cm["non-projected"][p]], point) f.point = point f.score = score features.append(f) else: f = Pharmacophore.Feature( self.feature_definitions[ hotspot_to_cm["non-projected"][p]], point) f.point = point f.score = score features.append(f) else: # for proj in projs: # just picking the closest now centre = (projs.spheres[0].centre[0], projs.spheres[0].centre[1], projs.spheres[0].centre[2]) s = GeometricDescriptors.Sphere(centre=centre, radius=radius) f = Pharmacophore.Feature( self.feature_definitions[hotspot_to_cm["projected"] [p]], point, s) f.point = point f.projected = s f.projected_atom = projs.point_atom f.score = score features.append(f) self.detected_features = features
def create_feature(peak): point = GeometricDescriptors.Sphere(centre=peak.point, radius=1) feat = Pharmacophore.Feature(peak.feature_def, point) feat.point = point feat.score = peak.value return feat
def create_consensus(pharmacophores, cutoff=2, max_distance=2.0): """ """ new_features = [] # initialise grids from all pharmacophores feature_point_grids = _create_grids(pharmacophores) # add point spheres to corresponding grids features_by_type = { k: [f for p in pharmacophores for f in p.features if f.identifier == k] for k in feature_point_grids.keys() } for identifier, all_features in features_by_type.items(): # add spheres to grid feature_point_grids[identifier] = _features_to_grid( all_features, feature_point_grids[identifier]) # find peaks all_peaks = feature_point_grids[identifier].get_peaks(min_distance=2, cutoff=0) peak_objs = [] for peak in all_peaks: value = feature_point_grids[identifier].value_at_point(peak) if value >= cutoff: peak_objs.append( GridPeak(point=peak, value=value, feature_def=Pharmacophore. feature_definitions[identifier])) peaks_array = np.array([p.point for p in peak_objs]) # projections if len(peaks_array) > 0: if len(all_features[0].spheres) > 1: # if a peak has features with projections try for a concensus projection # assign features to closest peak for feature in all_features: index = closest_peak_index(peaks_array, feature, max_distance) if index is not None: peak_objs[int(index)].features.append(feature) # create projections for j, peak in enumerate(peak_objs): peak.create_projection_grid() peak.find_projection_peaks() feats = peak.create_new_features() for f in feats: # for a projected feat, if no projection found, scrap if len(f.spheres) > 1: new_features.append(f) else: for j, peak in enumerate(peak_objs): point = GeometricDescriptors.Sphere(centre=peak.point, radius=1) feat = Pharmacophore.Feature(peak.feature_def, point) feat.point = point feat.score = peak.value new_features.append(feat) return new_features, feature_point_grids
def run(self): if not os.path.exists(self.args.output_directory): os.makedirs(self.args.output_directory) Pharmacophore.read_feature_definitions() self.crystals = list(io.CrystalReader(self.args.overlay_file)) if self.args.threshold <= 0.0: self.args.threshold = (len(self.crystals)) / 2.0 if self.args.feature_definitions: self.feature_definitions = [ v for k, v in Pharmacophore.feature_definitions.items() if k in self.args.feature_definitions ] else: self.feature_definitions = [ fd for fd in Pharmacophore.feature_definitions.values() if fd.identifier != 'exit_vector' and fd.identifier != 'heavy_atom' and fd.identifier != 'hydrophobe' ] complete_set_of_features = [] for fd in self.feature_definitions: detected = [fd.detect_features(c) for c in self.crystals] all_feats = [f for l in detected for f in l] if not all_feats: continue minx = min(f.spheres[0].centre.x() for f in all_feats) miny = min(f.spheres[0].centre.y() for f in all_feats) minz = min(f.spheres[0].centre.z() for f in all_feats) maxx = max(f.spheres[0].centre.x() for f in all_feats) maxy = max(f.spheres[0].centre.y() for f in all_feats) maxz = max(f.spheres[0].centre.z() for f in all_feats) g = utilities.Grid((minx - 1., miny - 1., minz - 1.), (maxx + 1, maxy + 1, maxz + 1), 0.2) spheres = [] for f in all_feats: if f.spheres[0] in spheres: g.set_sphere(f.spheres[0].centre, f.spheres[0].radius, 0) else: spheres.append(f.spheres[0]) g.set_sphere(f.spheres[0].centre, f.spheres[0].radius, 1) islands = g.islands(self.args.threshold) print('Feature: %s, max value %.2f, n_features %d' % (fd.identifier, g.extrema[1], len(islands))) for island in islands: # how do I make a feature from an island? Location of highest value indices = island.indices_at_value(island.extrema[1]) centre = indices[0] org = island.bounding_box[0] centre = tuple(org[i] + island.spacing * centre[i] for i in range(3)) radius = 1.0 # Any other spheres? if len(all_feats[0].spheres) > 1: # Pick all features which contain centre feat_dists = {} for f in all_feats: dist, feat = (GeometricDescriptors.point_distance( f.spheres[0].centre, centre), f) if feat_dists.has_key(dist): feat_dists[dist].append(feat) else: feat_dists.update({dist: [feat]}) feat_dists = collections.OrderedDict( sorted(feat_dists.items())) shortest_distance = feat_dists.keys()[0] if len(feat_dists[shortest_distance]) > 1: new_feat = [ Pharmacophore.Feature( fd, GeometricDescriptors.Sphere(centre, radius), feat_dists[shortest_distance][i].spheres[1]) for i in range(len(feat_dists[shortest_distance])) ] else: new_feat = [ Pharmacophore.Feature( fd, GeometricDescriptors.Sphere(centre, radius), feat_dists[shortest_distance][0].spheres[1]) ] else: new_feat = [ Pharmacophore.Feature( fd, GeometricDescriptors.Sphere(centre, radius)) ] complete_set_of_features.extend(new_feat) model = Pharmacophore.Query(complete_set_of_features) model.write(os.path.join(self.args.output_directory, 'model.cm'))
PharmacophoreModel.from_file(os.path.join(wrk_dir, f)) for f in fnames ] feats = create_consensus(pm, cutoff=1) out = PharmacophoreModel() out.detected_features = feats for feat in feats: out.add_feature(feat) out.pymol_visulisation( "/home/pcurran/github_packages/pharmacophores/testdata/concensus") if __name__ == "__main__": cm_dir = os.path.dirname(os.path.dirname(io.csd_directory())) Pharmacophore.read_feature_definitions( os.path.join(cm_dir, "CSD_CrossMiner/feature_definitions")) wrkdir = "/home/pcurran/github_packages/pharmacophores/testdata/alignment" paths = [ "1AQ1_aligned.pdb", "1B38_aligned.pdb", "1B39_aligned.pdb", "1CKP_aligned.pdb" ] hetids = ["STU", "ATP", "ATP", "PVB"] chains = ["A", "A", "A", "A"] for path, het, chain in zip(paths, hetids, chains): create_pharmacophore(path, het, chain, out_dir=wrkdir) wrk_dir = "/home/pcurran/github_packages/pharmacophores/testdata/alignment" fnames = ["1AQ1_STU.cm", "1B38_ATP.cm", "1B39_ATP.cm", "1CKP_PVB.cm"] create_concensus(fnames, wrk_dir)