def generate_layer(self, iterations=2, thickness=10): layer_dict = {} prot = Protein.from_file(self.protein) masked_grids = self.bcv_result._single_grid() for i in range(1, iterations + 1): if i == 1: #initial layer difference_layer = self._get_sphere_grid( template=self.bcv_result.super_grids["apolar"].copy(), molecule=io.MoleculeReader(self.fragment)[0]) self.inner = difference_layer hr = self._diff_to_map(diff=difference_layer, map=masked_grids, prot=prot) else: self.outer = self.inner.dilate() for j in range(1, thickness): self.outer = self.outer.dilate() difference_layer = self.outer - self.inner self.inner = self.outer hr = self._diff_to_map(diff=difference_layer, map=masked_grids, prot=prot) layer_dict.update({"{}".format(i): hr}) return layer_dict
def run_conformers(inputs): mol_file, outdir = inputs fname = os.path.basename(mol_file).split(".")[0] + "_conf.mol2" conf_file = os.path.join(outdir, fname) conf_gen = conformer.ConformerGenerator() conf_gen.settings.max_conformers = 25 with io.MoleculeReader(mol_file) as reader, io.MoleculeWriter( conf_file) as writer: for m in reader: confs = conf_gen.generate(m) # If conformer generation fails, ConformerGenerator returns None rather # than []. Trying to iterate over this will crash the script, so we skip # further steps for the structure at this point. if confs is None: print( 'WARNING: Conformer generation failed for structure %s in %s!' % (m.identifier, mol_file)) continue for i, c in enumerate(confs): m = c.molecule # DUD-E includes multiple protonation and tautomeric states. For the database creation # these must be given unique ids. In the rank analysis only the highest ranked state # will count. # ID_{file number}_{molecule number}_{conformer number} --> ensures unique ID m.identifier = f"{m.identifier}_{i}" writer.write(m)
def _get_atomic_overlap(self, cav_id, other_id, lig_id): """ find the highest median bcv from all cavities, calculate percentage over between the best bcv and each query ligand :return: """ # inputs mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0] path = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip") if os.path.exists(path): hr = HotspotReader(path).read() # tasks out = hr.atomic_volume_overlap(mol) else: print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id)) out = {"donor": {}, "acceptor": {}, "apolar": {}} for a in mol.heavy_atoms: t = Helper.get_atom_type(a) if t == "doneptor": out["donor"].update({a.label: 0.0}) out["acceptor"].update({a.label: 0.0}) else: out[t].update({a.label: 0.0}) # output with open(self.atomic_overlaps[cav_id][other_id][lig_id], 'w') as writer: writer.write(str(out))
def run(self): prot = Protein.from_file(self.input().path) mol = io.MoleculeReader('ligands/{}.sdf'.format(self.pdb))[0] h = Runner() s = h.Settings() s.apolar_translation_threshold = 15 s.polar_translation_threshold = 15 s.polar_contributions = False s.sphere_maps = True s.nrotations = 3000 hr = h.from_protein(prot, buriedness_method='ghecom', nprocesses=1, settings=s, cavities=mol) out_settings = HotspotWriter.Settings() out_settings.charged = False w = HotspotWriter(os.path.dirname(self.output().path), grid_extension=".grd", zip_results=True, settings=out_settings) w.write(hr)
def _get_volume_overlap(self, cav_id, other_id, lig_id): """ find the highest median bcv from all cavities, calculate percentage over between the best bcv and each query ligand :return: """ def nonzero(val): if val == 0: return 1 else: return val # inputs mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0] path1 = os.path.join(self.hotspot[cav_id], "out.zip") path2 = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip") thresholds = [10, 14, 17] if os.path.exists(path1) and os.path.exists(path2): bcv = HotspotReader(path2).read() hot = HotspotReader(path1).read() # tasks other = Grid.from_molecule(mol) bcv_sg = Grid.get_single_grid(bcv.super_grids, mask=False) bcv_overlap = bcv_sg._mutually_inclusive(other=other).count_grid() lig_vol = (other > 0).count_grid() bcv_vol = (bcv_sg > 0).count_grid() hot_sgs = [(Grid.get_single_grid(hot.super_grids, mask=False) > t) for t in thresholds] hot_vols = [nonzero(hot_sg.count_grid()) for hot_sg in hot_sgs] hot_overlap = [hot_sg._mutually_inclusive(other=other).count_grid() for hot_sg in hot_sgs] # output with open(self.bcv_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer: writer.write(str((bcv_overlap / lig_vol) * 100)) with open(self.bcv_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer: writer.write(str((bcv_overlap / bcv_vol) * 100)) with open(self.hot_lig_overlaps[cav_id][other_id][lig_id], 'w') as writer: hot_lig = [str((a / lig_vol) * 100) for a in hot_overlap] print(hot_lig) writer.write(",".join(hot_lig)) with open(self.hot_hot_overlaps[cav_id][other_id][lig_id], 'w') as writer: hot_hot = [str((hot_overlap[i] / hot_vols[i]) * 100) for i in range(len(thresholds))] writer.write(",".join(hot_hot)) else: print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
def run_docking(self): #take virtual library and run docking print "Run GOLD docking ..." docker = Docker() settings = docker.settings self.start_ligand = io.MoleculeReader( os.path.join(self.in_dir, "fragment.mol2"))[0] tempd = tempfile.mkdtemp() settings.add_protein_file(os.path.abspath(self.protein)) settings.binding_site = settings.BindingSiteFromPoint( settings.proteins[0], self.start_ligand.centre_of_geometry(), 10.0) settings.fitness_function = 'plp' settings.autoscale = 10. settings.output_directory = tempd #settings.output_directory = self.in_dir settings.output_file = "docked_ligands.mol2" settings.add_ligand_file(self.add_ligands, ndocks=10) #setup constraints settings.add_constraint( settings.TemplateSimilarityConstraint(type="all", template=self.start_ligand, weight=150)) settings.ProteinFileInfo().fitting_points_file("fname.mol2") #feed in layer2 #self.hotspot_result.predict_protein_hbond_constraints(settings, weight = 100) results = docker.dock() #fragment = results.ligands[0] ligand_reader = results.ligands output_file = os.path.join(settings.output_directory, settings.output_file) docked_molecules = [ m for m in io.MoleculeReader(os.path.join(tempd, output_file)) ] print docked_molecules return docked_molecules
def mogul_summary(filename, engine): mr = io.MoleculeReader(filename) mol = mr[0] gmol = engine.analyse_molecule(mol) name = os.path.splitext(os.path.basename(filename))[0] out = [name] for geom in (gmol.analysed_angles, gmol.analysed_bonds, gmol.analysed_rings, gmol.analysed_torsions): out.append(len(geom)) out.append(len([x for x in geom if x.unusual])) print(",".join([str(x) for x in out]))
def run(self): mols = io.MoleculeReader(self.input()['ligands'].path) hr = HotspotReader(self.input()['hs_result'].path).read() with open(self.output().path, 'w') as csv_file: csv_file.write("mol_id,atom_id,score\n") for mol in mols: scored_mol = hr.score(mol) for a in scored_mol.heavy_atoms: out_str = "{},{},{}\n".format(mol.identifier, a.label, a.partial_charge) csv_file.write(out_str)
def testdetect_from_ligand_ensemble(self): wrk_dir = "testdata/pharmacophore_extension/LigandPharmacophoreModel/from_ligand_ensemble" with PushDir(wrk_dir): test_overlay = io.MoleculeReader("test_overlay.mol2") ligand_pharmacophore = LigandPharmacophoreModel() ligand_pharmacophore.feature_definitions = [ "ring_planar_projected" ] ligand_pharmacophore.detect_from_ligand_ensemble( ligands=test_overlay, cutoff=2) # ligand_pharmacophore.pymol_visulisation(outdir="") self.assertEqual(2, len(ligand_pharmacophore.detected_features))
def main(): base = "/local/pcurran/leads_frag" pdbs = [ p for p in os.listdir(base) if os.path.isdir(os.path.join(base, p)) ] for pdb in tqdm(pdbs): fpath = os.path.join(base, pdb, f"{pdb}_ref.mol2") mol = io.MoleculeReader(fpath)[0] g = Grid.from_molecule(mol, mode='replace', padding=10, scaling=0.5) out_path = os.path.join(base, pdb, "control.mol2") docking_fitting_pts(g, fname=out_path, high=1)
def score_hitlist(self): #volume overlap between initial fragment and subtitution #self.start_ligand overlap_cutoff = 0.85 lig = io.MoleculeReader(os.path.join(self.in_dir, "fragment.mol2"))[0] ref = self.bcv_result.super_grids["apolar"].copy() ref *= 0 for a in lig.atoms: ref.set_sphere(a.coordinates, a.vdw_radius, 1) #placeholder atom rmsd needed ref_a = [ at for at in lig.atoms if at.is_cyclic and at.is_donor and at.atomic_weight == 14.0067 ][0] for i, h in enumerate(self.hit_list): print i clean = self.bcv_result.super_grids["apolar"].copy() clean *= 0 for b in h.atoms: clean.set_sphere(b.coordinates, b.vdw_radius, 1) overlap = (ref > 0) & (clean > 0) percentage_overlap = float(len(self.get_scores(overlap))) / float( len(self.get_scores(ref))) hit_b = [ atm for atm in h.atoms if atm.is_cyclic and atm.is_donor and atm.atomic_weight == 14.0067 ][0] fudge_shift = self.fudge_shift(ref_a, hit_b) #print percentage_overlap, len(self.get_scores(ref)), len(self.get_scores(clean)), len(self.get_scores(overlap)) if percentage_overlap > overlap_cutoff and fudge_shift < 2: hotspot_score = self.hotspot_result.score_ligand(h) self.score_dict.update({h: hotspot_score}) d = OrderedDict( sorted(self.score_dict.items(), key=itemgetter(1), reverse=True)) return d
def _get_ligand_volume(self, other_id, lig_id): """ from a ligand, output a molecular volume in A^3 :param i: position in list of 'other' proteins :return: """ # inputs ligand = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0] # tasks g = Grid.from_molecule(ligand) vol = g.count_grid() * (g.spacing ** 3) # output with open(self.ligand_volume[other_id][lig_id], 'w') as f: f.write(str(vol))
def testdetect_from_ligand_ensemble_cdk2(self): wrk_dir = "testdata/pharmacophore_extension/LigandPharmacophoreModel/from_ligand_ensemble_big_all" with PushDir(wrk_dir): test_overlay = io.MoleculeReader("cdk2_ligands.mol2") ligand_pharmacophore = LigandPharmacophoreModel() ligand_pharmacophore.feature_definitions = [ "ring_planar_projected", "donor_projected", "acceptor_projected" ] ligand_pharmacophore.detect_from_ligand_ensemble( ligands=test_overlay, cutoff=2) feature_count = 4 selected = ligand_pharmacophore.top_features(num=feature_count) ligand_pharmacophore.detected_features = selected self.assertEqual(feature_count, len(ligand_pharmacophore))
def delete_anion(self, path_anion): ''' removing anions which are defined by mol2 file in a entry :param path_anion: the defined anions files :return: None ''' if os.path.isdir(path_anion): anion_list = [ search.MoleculeSubstructure( io.MoleculeReader(f)[0].components[0]) for f in glob.glob(os.path.join(path_anion, '*.mol2')) ] else: raise FileExistsError('do not find the path!') list_crystals_remove_anion = [] p_bar = tqdm(self.entry_reader) for entry in p_bar: if entry.has_3d_structure: # Ensure labels are unique mol = entry.molecule mol.normalise_labels() # Use a copy clone = mol.copy() # Remove all metal atoms clone.remove_atoms(a for a in clone.atoms if a.is_metal or not a.bonds) for c in clone.components: for anion in anion_list: ani_search = search.SubstructureSearch() ani_search.add_substructure(anion) hits = ani_search.search(c) for hit in hits: hit_atoms = hit.match_atoms() if len(hit_atoms) == len(c.atoms): mol.remove_atoms( mol.atom(a.label) for a in hit_atoms) entry.crystal.molecule = self.__delete_isolated_atoms(mol) list_crystals_remove_anion.append(entry) p_bar.set_description('Anions removing...') self.entry_reader = list_crystals_remove_anion
def chunk_files(mol_file, outdir, chunk_size=100): outfiles = list() if not os.path.exists(outdir): os.mkdir(outdir) mols = [m for m in io.MoleculeReader(mol_file)] chunks = [mols[x:x + chunk_size] for x in range(0, len(mols), chunk_size)] for i, chunk in enumerate(chunks): fname = f"{os.path.basename(mol_file).split('.')[0]}_chunk{i}.mol2" outfile = os.path.join(outdir, fname) outfiles.append(outfile) with io.MoleculeWriter(outfile) as w: for j, mol in enumerate(chunk): mol.identifier = f"{mol.identifier}_{i}_{j}" w.write(mol) return outfiles
def get_all_function_groups(path_mols, path_con): """从*.mol2文件中找到指定基团的类型及数量 :param path_cifs: :param path_con: :return: """ # 确定每个已经去除了溶剂的*.mol2文件的名称和绝对路径 list_mol_names = os.listdir(path_mols) list_path_mols = glob.glob(os.path.join(path_mols, '*.mol2')) # 通过con定义功能基团 list_con_names = os.listdir(path_con) path_conner_list = glob.glob(os.path.join(path_con, '*.con')) list_connser_substructure = [] for path in path_conner_list: connser_substructure = search.ConnserSubstructure(path) list_connser_substructure.append(connser_substructure) # 读取mol2文件中 dict_result = dict() count = 0 pbar = tqdm(list_path_mols) for path_cif_temp in pbar: list_temp = [ ] # 维度为len(list_connser_substructure),即维度为定义的官能团个数;该列表用于储存当前cif文件中包含基团的数目 mol_temp = io.MoleculeReader(path_cif_temp)[0] # 读取cif文件 for func_group in list_connser_substructure: substructure_search = search.SubstructureSearch() _ = substructure_search.add_substructure(func_group) hits = substructure_search.search(mol_temp) list_temp.append(len(hits)) dict_result[list_mol_names[count]] = list_temp count += 1 pbar.set_description('正在统计所有的指定基团:') return dict_result, list_con_names
def _get_matched_atoms(self, cav_id, other_id, lig_id): """ This is the ligand overlap implimentation in the DoGsiter paper :param cav_id: :param other_id: :param lig_id: :return: """ # inputs mol = io.MoleculeReader(self.extracted_ligands[other_id][lig_id])[0] path = os.path.join(self.bcv[cav_id][other_id][lig_id], "out.zip") if os.path.exists(path): hr = HotspotReader(path).read() # tasks perc, type_dic = hr.percentage_matched_atoms(mol=mol, threshold=0, match_atom_types=True) # output with open(self.matched[cav_id][other_id][lig_id], 'w') as writer: writer.write(str(perc) + "\n") writer.write(str(type_dic)) else: print("no BCV for cavity {}, BCV {}".format(cav_id, lig_id))
def __init__(self, in_dir, charged=False, library=True): self.in_dir = in_dir self.fragment = os.path.join(self.in_dir, "fragment.mol2") if library == True: self.virtual_library = [ m for m in io.MoleculeReader("virtual_library.mol2") ] else: self.virtual_library = self.generate_library() self.protein = os.path.join(self.in_dir, "protein.pdb") self.charged = charged self.hotspot_result = self.generate_hotspot() self.bcv_result = self.generate_BCV() self.layer_dict = self.generate_layer() self.constraints = self.growing_constraints(self.layer_dict["2"]) #self.add_ligands = os.path.join(self.in_dir, "decorated_fragments.mol2") #self.hit_list = self.run_docking() # {ccdc.Molecule: [percentage_overlap, hotspot_score] self.score_dict = {}
import os from ccdc import io from ccdc.descriptors import MolecularDescriptors if __name__ == "__main__": pdb = "4G46" base = f"/local/pcurran/leads_frag/{pdb}" # test mol1 = io.MoleculeReader(os.path.join(base, f"{pdb}_ligand.mol2"))[0] mol2 = io.MoleculeReader(os.path.join(base, f"{pdb}_ref.mol2"))[0] mol3 = io.MoleculeReader(os.path.join(base, "gold/goldscore/data/ranked_4G46_ligand_m1_1.mol2"))[0] rm = [] for atm in mol3.heavy_atoms: if atm.label == "****": rm.append(atm) mol3.remove_atoms(rm) print([atm.label for atm in mol1.heavy_atoms]) print([atm.label for atm in mol2.heavy_atoms]) print([atm.label for atm in mol3.heavy_atoms]) a = MolecularDescriptors.rmsd(mol1, mol3)
from ccdc import io from scipy.spatial import distance mol = io.MoleculeReader( "/local/pcurran/leads_frag/3CHC/gold/goldscore/data/fit_pts.mol2")[0] pt_1 = [[a.coordinates.x, a.coordinates.y, a.coordinates.z] for i, a in enumerate(mol.atoms) if i <= 0] pt_all = [[a.coordinates.x, a.coordinates.y, a.coordinates.z] for i, a in enumerate(mol.atoms) if i > 0] ds = distance.cdist(pt_1, pt_all) print(min(ds[0]))
action='store_true', default=False, help="Don't run minimisation on conformers (default: False)") parser.add_argument('--nocleanup', '-nc', action='store_true', default=False, help="Don't remove conformer search logs (default: False)") args = parser.parse_args() # Read molecule ------------------------------------------------------------------------------------ print('%s\nReading molecule: %s\n' % ('-' * 40, args.molecule)) mol_path = args.molecule mol_name = os.path.splitext(os.path.basename(mol_path))[0] mol_reader = io.MoleculeReader(mol_path) mol = mol_reader[0] mol_reader.close() # Generate conformers ------------------------------------------------------------------------------ if args.conformers > 0: print('Generating %i conformers...' % args.conformers) conformer_generator = conformer.ConformerGenerator( ) # Initialize conformer generator conformer_generator.settings.max_conformers = args.conformers # Set max number of conformers conformers = conformer_generator.generate(mol) # Run conformer generator else: print('Skipping conformer generation...') conformers = None if conformers is not None:
from ccdc import io import pandas as pd import numpy as np import time np.random.seed(901) csd_reader = io.MoleculeReader('CSD') class Mol(): """ A wrapper class for csd molecule objects. """ def __init__(self, index): self._molecule = self.get_mol(index) def __getattr__(self, attr): """Wraps this class object around a CSD molecule object.""" if attr in self.__dict__: return getattr(self, attr) return getattr(self._molecule, attr) def get_mol(self, index): """Acquires a molecule object from the CSD, using either the string label for the structure, or its numerical index.""" try: return csd_reader[index] except TypeError: return csd_reader.molecule(index) def remove_unlocated(self):
def delete_solvents(self, list_solvent_names=None): """删除晶体中的溶剂,若没有指定溶剂列表,则默认为CCDC数据库自带的溶剂列表 :param list_solvent_names: 溶剂名称构成的列表,type:list or tuple :return: None """ # CSD数据库的溶剂所在的路径 solvent_file = os.path.join(os.path.dirname(io.csd_directory()), 'Mercury', 'molecular_libraries', 'ccdc_solvents') # 若没指定需要去除的溶剂列表,则会将CSD数据库中指定的74个溶剂都考虑进去。以下代码得到溶剂的smiles字符串 if not list_solvent_names: if os.path.isdir(solvent_file): solvent_smiles = [ io.MoleculeReader(f)[0].smiles for f in glob.glob(os.path.join(solvent_file, '*.mol2')) ] else: raise FileExistsError('路径不存在!') else: if os.path.isdir(solvent_file): solvent_smiles = [ io.MoleculeReader( os.path.join(solvent_file, solvent + '.mol2')[0].smiles for solvent in list_solvent_names) ] else: raise FileExistsError('路径不存在!') # 去除溶剂 list_crystals_remove_solvents = [] p_bar = tqdm(self.entry_reader) for entry in p_bar: try: if entry.has_3d_structure: # Ensure labels are unique mol = entry.molecule mol.normalise_labels() # Use a copy clone = mol.copy() # Remove all bonds containing a metal atom clone.remove_bonds(b for b in clone.bonds if any(a.is_metal for a in b.atoms)) # Work out which components to remove to_remove = [ c for c in clone.components if not self.has_metal(c) and (not self.is_multidentate( c, mol) or self.is_solvent(c, solvent_smiles)) ] # Remove the atoms of selected components mol.remove_atoms( mol.atom(a.label) for c in to_remove for a in c.atoms) # Write the CIF entry.crystal.molecule = mol list_crystals_remove_solvents.append(entry) else: list_crystals_remove_solvents.append(entry) except BaseException: list_crystals_remove_solvents.append(entry) p_bar.set_description('正在去除溶剂:') self.entry_reader = list_crystals_remove_solvents return None
def get_neighbor_function_groups(path_mols, path_con, query_atom): # 确定每个已经去除了溶剂的*.mol2文件的名称和绝对路径 list_mol_names = os.listdir(path_mols) list_path_mols = glob.glob(os.path.join(path_mols, '*.mol2')) # 通过con定义功能基团 list_con_names = os.listdir(path_con) path_conner_list = glob.glob(os.path.join(path_con, '*.con')) list_connser_substructure = [] for path in path_conner_list: connser_substructure = search.ConnserSubstructure(path) list_connser_substructure.append(connser_substructure) # 统计配位基团的类型及数量 dict_result = dict() pbar = tqdm(range(len(list_path_mols))) for i in pbar: # 读取分子,并且读取出其中的components path_mol = list_path_mols[i] mol = io.MoleculeReader(path_mol)[0] list_components = mol.components mol.normalise_labels() # 统计每个基团在分子中出现的次数 list_temp = [] # 储存每个mol2文件中匹配到的配位基团的数量 for con in list_connser_substructure: count_temp = 0 # 基团出现数量 for component in list_components: set_temp = set() # 用于存放出现的基团的字符串 # 查询金属原子 m = QueryAtom(query_atom) s = search.QuerySubstructure() s.add_atom(m) sub_search = search.SubstructureSearch() sub_search.add_substructure(s) mol_metals = sub_search.search(component) if len(mol_metals) > 0: substructure_search = search.SubstructureSearch() substructure_search.add_substructure(con) hits = substructure_search.search(component) if len(hits) > 0: for hit in hits: temp_hit_atoms = hit.match_atoms() # 匹配到的基团的原子 for temp_metal in mol_metals: temp_metal = temp_metal.match_atoms()[0] common_elements = set( temp_metal.neighbours) & set( temp_hit_atoms) if len(common_elements) > 0: set_temp.add(str(temp_hit_atoms)) # for num in range(len(mol_metals)): # metal_label = query_atom + str(num + 1) # temp_metal = component.atom(metal_label) # common_elements = set(temp_metal.neighbours) & set(temp_hit_atoms) # if len(common_elements) > 0: # set_temp.add(str(temp_hit_atoms)) count_temp += len(set_temp) list_temp.append(count_temp) dict_result[list_mol_names[i]] = list_temp return dict_result, list_con_names