def from_smiles(smiles, path, identifier=None, generate_initial_sites=True): """ Create a :class:`ccdc.molecule.Molecule` from a SMILES string. *e.g.*:: ethene = Molecule.from_smiles('C=C', 'Ethene') If ``identifier`` is not specified, the SMILES string will be used as the molecule identifier. :param smiles: str :param identifier: str :param generate_initial_sites: boolean - whether to include an initial guess at 3D coordinates :return: a :class:`ccdc.molecule.Molecule` instance with coordinates """ if identifier is None: identifier = smiles if generate_initial_sites: parameter_files = _CSDDatabaseLocator.get_conformer_parameter_file_location() molmaker = ConformerGeneratorLib.MoleculeTo3D(parameter_files) mol = Molecule(identifier, molmaker.create_conformation(smiles)) else: molmaker = ChemicalAnalysisLib.SMILESMoleculeMaker() mol = Molecule(identifier, _molecule=molmaker.siteless_atoms(smiles)) with MoleculeWriter(path) as w: w.write(mol) return mol
def get_label(input, threshold=None): """ creates a value labels from an input grid dictionary :param dic input: key = "probe identifier" and value = `ccdc.utilities.Grid` :return `ccdc.molecule.Molecule`: pseduomolecule which contains score labels """ min_size_dict = {"apolar": 40, "donor": 6, "acceptor": 9, "negative": 9, "positive": 9} atom_dic = {"apolar": 'C', "aromatic": 'C', "donor": 'N', "weak_donor": 'N', "acceptor": 'O', "weak_acceptor": 'O', "negative": 'S', "positve": 'H' #"surface": '' } try: interaction_types = [atom_dic[feat.feature_type] for feat in input._features if feat.feature_type != "surface"] coordinates = [feat.feature_coordinates for feat in input._features if feat.feature_type != "surface"] scores = [feat.score_value for feat in input._features if feat.feature_type != "surface"] except AttributeError: print(threshold) try: if threshold is None: pass else: interaction_types = [] coordinates = [] scores = [] for p, g in input.items(): for island in g.islands(threshold=threshold): if island.count_grid() > min_size_dict[p]: interaction_types.append(atom_dic[p]) coordinates.append(island.centroid()) scores.append(max(island.grid_values(threshold=threshold))) except AttributeError: print("object not supported") mol = Molecule(identifier="pharmacophore_model") pseudo_atms = [Atom(atomic_symbol=interaction_types[i], atomic_number=14, coordinates=coordinates[i], label=str(scores[i])) for i in range(len(interaction_types))] for a in pseudo_atms: mol.add_atom(a) return mol
def to_molecule(self, protein=None): if self.protein is None: if protein is None: raise AttributeError("Give me a protein") mol = Molecule(identifier="constraints") for index, score in self.index_by_score.items(): atm = self.protein.atoms[index] atm.label = str(score) mol.add_atom(atm) return mol
def _get_crossminer_pharmacophore(self): """ convert a PharmacophoreModel into a crossminer pharmacophore """ # TODO: UPDATE WITH CHARGED FEATURES supported_features = {"acceptor_projected": "acceptor", "donor_projected": "donor", "ring": "apolar"} try: Pharmacophore.read_feature_definitions() except: raise ImportError("Crossminer is only available to CSD-Discovery") feature_definitions = {supported_features[fd.identifier]: fd for fd in Pharmacophore.feature_definitions.values() if fd.identifier in supported_features.keys()} model_features = [] for feat in self._features: if feat.feature_type == "negative" or feat.feature_type == "positive": print("Charged feature not currently supported in CrossMiner: Its on the TODO list") else: sphere = GeometricDescriptors.Sphere(feat.feature_coordinates, self.settings.radius) if feat.projected_coordinates: projected = GeometricDescriptors.Sphere(feat.projected_coordinates, self.settings.radius) p = Pharmacophore.Feature(feature_definitions[feat.feature_type], *[sphere, projected]) else: p = Pharmacophore.Feature(feature_definitions[feat.feature_type], sphere) model_features.append(p) if self.settings.excluded_volume: if not self.protein: print("Pharmacophore Model must have protein to calculate excluded volume") else: bs = self._get_binding_site_residues() for residue in bs.residues: mol = None mol = Molecule(identifier="temp_residue") # for a in residue.backbone_atoms: # ev = Pharmacophore.ExcludedVolume(GeometricDescriptors.Sphere(a.coordinates, 2)) # model_features.append(ev) for a in residue.backbone_atoms: mol.add_atom(a) centre = mol.centre_of_geometry() ev = Pharmacophore.ExcludedVolume(GeometricDescriptors.Sphere(centre, 2)) model_features.append(ev) return Pharmacophore.Query(model_features)
def _output_feature_centroid(self): dic = {"apolar": "C", "acceptor": "N", "donor": "O"} mol = Molecule(identifier="centroids") for i, feat in enumerate(self.features): coords = feat.grid.centroid() mol.add_atom( Atom(atomic_symbol=dic[feat.feature_type], atomic_number=14, coordinates=coords, label=str(i))) from ccdc import io with io.MoleculeWriter("cenroid.mol2") as w: w.write(mol)
def rdkit_to_ccdc(mol): """ Convert RDKit mol to CCDC mol :param mol: :return: """ ccdc_mol = Molecule.from_string(Chem.MolToMolBlock(mol), format='mol') ccdc_mol.identifier = mol.GetProp("_Name") return ccdc_mol
def ccdc_mol_from_smiles(smiles, identifier=None, generate_initial_sites=True): """ Pete's function for making a ccdc molecule with initial coordinates from a smiles string. :param identifier: :param generate_initial_sites: :return: """ if identifier is None: identifier = smiles if generate_initial_sites: parameter_files = _CSDDatabaseLocator.get_conformer_parameter_file_location() molmaker = ConformerGeneratorLib.MoleculeTo3D(parameter_files) mol = Molecule(identifier, molmaker.create_conformation(smiles)) else: molmaker = ChemicalAnalysisLib.SMILESMoleculeMaker() mol = Molecule(identifier, _molecule=molmaker.siteless_atoms(smiles)) return mol
def _docking_fitting_pts(self, _best_island=None, threshold=17): """ :return: """ if _best_island: single_grid = _best_island else: single_grid = Grid.get_single_grid(self.super_grids, mask=False) dic = single_grid.grid_value_by_coordinates(threshold=threshold) mol = Molecule(identifier="constraints") for score, v in dic.items(): for pts in v: atm = Atom(atomic_symbol='C', atomic_number=14, label='{:.2f}'.format(score), coordinates=pts) atm.partial_charge = score mol.add_atom(atom=atm) return mol
def from_smiles(self, smiles, identifier=None, generate_initial_sites=True): """ Function taken from Pete's code. :param smiles: :param identifier: :param generate_initial_sites: :return: """ if identifier is None: identifier = smiles if generate_initial_sites: parameter_files = _CSDDatabaseLocator.get_conformer_parameter_file_location( ) molmaker = ConformerGeneratorLib.MoleculeTo3D(parameter_files) mol = Molecule(identifier, molmaker.create_conformation(smiles)) else: molmaker = ChemicalAnalysisLib.SMILESMoleculeMaker() mol = Molecule(identifier, _molecule=molmaker.siteless_atoms(smiles)) return mol
def write(self, fname): """ writes out pharmacophore. Supported formats: - ".cm" (*CrossMiner*), - ".json" (`Pharmit <http://pharmit.csb.pitt.edu/search.html/>`_), - ".py" (*PyMOL*), - ".csv", - ".mol2" :param str fname: path to output file """ extension = splitext(fname)[1] if extension == ".cm": print "WARNING! Charged features not currently supported in CrossMiner!" pharmacophore = self._get_crossminer_pharmacophore() pharmacophore.write(fname) elif extension == ".csv": with open(fname, "wb") as csv_file: csv_writer = csv.writer(csv_file, delimiter=",") line = 'Identifier, Feature_type, x, y, z, score, ' \ 'projected_x, projected_y, projected_z, ' \ 'vector_x, vector_y, vector_z' for feature in self._features: line += "{0},{1},{2},{3},{4},{5}".format( self.identifier, feature.feature_type, feature.feature_coordinates.x, feature.feature_coordinates.y, feature.feature_coordinates.z, feature.score_value) if feature.projected_coordinates: line += ",{0},{1},{2}".format( feature.projected_coordinates.x, feature.projected_coordinates.y, feature.projected_coordinates.z) else: line += ",0,0,0" if feature.vector: line += ",{0},{1},{2}".format(feature.vector.x, feature.vector.y, feature.vector.z) else: line += ",0,0,0" l = line.split(",") csv_writer.writerow(l) elif extension == ".py": with open(fname, "wb") as pymol_file: lfile = "label_threshold_{}.mol2".format(self.identifier) pymol_out = pymol_imports() pymol_out += pymol_arrow() lines = self._get_pymol_pharmacophore(lfile) pymol_out += lines pymol_file.write(pymol_out) label = self.get_label(self) with io.MoleculeWriter(join(dirname(fname), lfile)) as writer: writer.write(label) elif extension == ".json": with open(fname, "w") as pharmit_file: pts = [] interaction_dic = { 'apolar': 'Hydrophobic', 'donor': 'HydrogenDonor', 'acceptor': 'HydrogenAcceptor', 'negative': 'NegativeIon', 'positive': 'PositiveIon' } for feat in self._features: if feat.vector: point = { "name": interaction_dic[feat.feature_type], "hasvec": True, "x": feat.feature_coordinates.x, "y": feat.feature_coordinates.y, "z": feat.feature_coordinates.z, "radius": feat.settings.radius, "enabled": True, "vector_on": feat.settings.vector_on, "svector": { "x": feat.vector.x, "y": feat.vector.y, "z": feat.vector.z }, "minsize": "", "maxsize": "", "selected": False } else: point = { "name": interaction_dic[feat.feature_type], "hasvec": False, "x": feat.feature_coordinates.x, "y": feat.feature_coordinates.y, "z": feat.feature_coordinates.z, "radius": feat.settings.radius, "enabled": True, "vector_on": feat.settings.vector_on, "svector": { "x": 0, "y": 0, "z": 0 }, "minsize": "", "maxsize": "", "selected": False } pts.append(point) pharmit_file.write(json.dumps({"points": pts})) elif extension == ".mol2": mol = Molecule(identifier="pharmacophore_model") atom_dic = { "apolar": 'C', "donor": 'N', "acceptor": 'O', "negative": 'S', "positve": 'H' } pseudo_atms = [ Atom(atomic_symbol=atom_dic[feat.feature_type], atomic_number=14, coordinates=feat.feature_coordinates, label=str(feat.score_value)) for feat in self.features ] for a in pseudo_atms: mol.add_atom(a) with io.MoleculeWriter(fname) as w: w.write(mol) elif extension == ".grd": g = self._as_grid() g.write(fname) else: raise TypeError( """""{}" output file type is not currently supported.""". format(extension))
def dock(self): """ Setup and execution of docking run with GOLD. NB: Docking Settings class is imported from the Hotspots API rather than Docking API. This is essential for running hotspot guided docking. :return: a :class:`ccdc.io.MoleculeReader` """ docker = Docker() docker.settings = hs_docking.DockerSettings() # download protein PDBResult(self.args.pdb).download(self.temp) protein = Protein.from_file( os.path.join(self.temp, self.args.pdb + ".pdb")) protein.remove_all_waters() protein.remove_all_metals() protein.add_hydrogens() for l in protein.ligands: protein.remove_ligand(l.identifier) f = os.path.join(self.temp, self.args.pdb + ".mol2") with MoleculeWriter(f) as w: w.write(protein) # setup docker.settings.add_protein_file(f) # create binding site from list of residues cavs = Cavity.from_pdb_file( os.path.join(self.temp, self.args.pdb + ".pdb")) cavs[0].to_pymol_file("test.py") c = {} for i, cav in enumerate(cavs): cav.feats = [] for f in cav.features: try: cav.feats.append(f.residue) except: continue # cav.feats = [f.residue for f in cav.features] cav.len = len(cav.feats) c.update({cav.len: cav.feats}) cav.to_pymol_file("{}.py".format(i)) selected_cavity = max(c.keys()) docker.settings.binding_site = docker.settings.BindingSiteFromListOfResidues( protein=docker.settings.proteins[0], residues=c[selected_cavity]) docker.settings.fitness_function = 'plp' docker.settings.autoscale = 100. docker.settings.output_directory = self.temp docker.settings.output_file = "docked_ligands.mol2" docker.settings.add_ligand_file(self.search_ligands, ndocks=25) # constraints if self.args.hotspot_guided is True: e_settings = result.Extractor.Settings() e_settings.mvon = True extractor = result.Extractor(self.hr, settings=e_settings) bv = extractor.extract_best_volume(volume=300)[0] f = hs_utilities.Helper.get_out_dir( os.path.join(self.args.path, "best_volume")) with hs_io.HotspotWriter(path=f) as hw: hw.write(bv) constraints = docker.settings.HotspotHBondConstraint.create( protein=docker.settings.proteins[0], hr=bv, weight=5, min_hbond_score=0.2, max_constraints=5) for constraint in constraints: docker.settings.add_constraint(constraint) docker.settings.generate_fitting_points(hr=bv) mol = Molecule(identifier="constraints") for constraint in constraints: for a in constraint.atoms: mol.add_atom( Atom(atomic_symbol="C", atomic_number=14, label="Du", coordinates=a.coordinates)) with MoleculeWriter(os.path.join(self.args.path, "constaints.mol2")) as w: w.write(mol) docker.dock() results = docker.Results(docker.settings) return results.ligands
def dock(self, number_poses=100): """ :return: """ # Set up protein and ligand, in case they need to be if self.prepare_protein: self.prepare_protein_for_dock() if self.prepare_ligand: self.prepare_ligand_for_dock() reference_ligand = MoleculeReader(self.reference_ligand_path)[0] prepared_protein = Protein.from_file(self.prepared_protein_path) prepared_ligand = MoleculeReader(self.prepared_ligand_path)[0] if self.substructure: substr_string = make_substructure_molecule( template_mol_path=self.reference_ligand_path, query_mol_path=self.prepared_ligand_path) substructure = Molecule.from_string(substr_string, format='sdf') with MoleculeWriter( str( Path(self.gold_results_directory, f"{self.lig_name}_substructure.sdf"))) as sdfwr: sdfwr.write(substructure) # Set up the docking run docker = Docker() docker._conf_file_name = self.conf_file_location docker_settings = docker.settings # Prevent it from generating a ton of output ligand files - the ranked docks are in 'concat_ranked_docked_ligands.mol2' docker_settings._settings.set_delete_rank_files(True) docker_settings._settings.set_delete_empty_directories(True) docker_settings._settings.set_delete_all_initialised_ligands(True) docker_settings._settings.set_delete_all_solutions(True) docker_settings._settings.set_delete_redundant_log_files(True) docker_settings._settings.set_save_lone_pairs(False) # Set up the binding site. Since the sites are based on ragment hits, generate a binding site around the starting hit. docker_settings.reference_ligand_file = self.reference_ligand_path docker_settings.binding_site = docker_settings.BindingSiteFromLigand( prepared_protein, reference_ligand, 6.0) # Default distance around ligand is 6 A. Should be ok for small fragments. docker_settings.add_protein_file(self.prepared_protein_path) docker_settings.diverse_solutions = self.diverse_solutions # Try a template similarity restraint: # if self.substructure: try: docker_settings.add_constraint( docker_settings.ScaffoldMatchConstraint(substructure)) except Exception as e: docker_settings.add_constraint( docker_settings.TemplateSimilarityConstraint( 'all', reference_ligand, weight=75.0)) txtstr = 'Substructure search failed. Using template similarity' log_file = Path(self.results_directory, 'pipeline_error.log') log_file.write_text(txtstr) else: docker_settings.add_constraint( docker_settings.TemplateSimilarityConstraint('all', reference_ligand, weight=150.0)) # Choose the fitness function: options: ['goldscore', 'chemscore', 'asp', 'plp']. plp is the default. docker_settings.fitness_function = 'plp' docker_settings.autoscale = self.autoscale docker_settings.early_termination = False docker_settings.output_directory = self.gold_results_directory docker_settings.output_file = str( Path(self.results_directory, 'concat_ranked_docked_ligands.mol2')) # Add the ligand docker_settings.add_ligand_file( self.prepared_ligand_path, number_poses ) # Second argument determines how many poses are saved # Perform the docking: gold_result = docker.dock(file_name=self.conf_file_location) # pickle.dump(obj=gold_result,file=Path(self.results_directory, 'gold_result').open()) self.docking_result = gold_result return gold_result