def rmsd(self, ligand, reference): """ prepares molecules for rmsd calculation :param a: :param b: :return: """ return MolecularDescriptors.rmsd(self._clean_mol(ligand), reference)
def rdkit_rmsd(self, conformer, expected): try: rd_conformer = etkdg_generator.ccdc_to_rdkit(conformer) rd_expected = etkdg_generator.ccdc_to_rdkit(expected) rmsd = AllChem.GetBestRMS(rd_expected, rd_conformer) aligned_conf = etkdg_generator.rdkit_conformer_to_ccdc( rd_conformer, -1) return (aligned_conf, rmsd) except: return MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto( conformer, expected, rotate_torsions=False)
def docks_to_ref_rmsd(self): # Only calculate for complete docking results! docks = [l.molecule for l in self.docking_result.ligands] ref_lig = MoleculeReader(self.prepared_ligand_path)[0] rmsds = [ MolecularDescriptors.rmsd(ref_lig, nd, exclude_hydrogens=True, atoms=self.match_heavy_atoms( ref_lig, nd)) for nd in docks ] return rmsds
def generate_ccdc_start_points_from_expected(self, expected_folder, output_folder, results_stream, add_hydrogens, no_assign_bond_types): """ Some simple code to use our 1D->3D methods to generate start points """ if output_folder is not None: if not os.path.exists(output_folder): os.mkdir(output_folder) first = True for f in os.listdir(expected_folder): if os.path.splitext(f)[1] == ".mol2": expected_mol_filename = os.path.join(expected_folder, f) output_mol_filename = os.path.join(output_folder, os.path.basename(f)) input_molecule = self._molecule(expected_mol_filename, no_assign_bond_types) if add_hydrogens: input_molecule.add_hydrogens() molid = input_molecule.identifier mol = Molecule.from_molecule(input_molecule) with MoleculeWriter(output_mol_filename) as mol_writer: mol_writer.write(mol) expected_molecule = self._molecule(expected_mol_filename, no_assign_bond_types) overlay = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto( mol, expected_molecule, rotate_torsions=True) if first: results_stream.write( 'id,rotated torsion rmsd,rotated torsion rmsd tanimoto\n' ) first = False results_stream.write( string.join( [molid, str(overlay[1]), str(overlay[2])], ',') + '\n')
def simpleRelabelling(stringIn, crystal):# replacementDict): ''' Simple relabelling assumes that you can just add nAtomType to each label i.e. if 2 O atoms you can assume O1 will be O3, O5, O7 in other molecules etc ''' # atomsPerMol = dict([re.split('(\d+)', x)[:2] for x in crystal.molecule.formula.split()]) atomsPerMol = dict([re.split('(\d+)', x)[:2] for x in crystal.molecule.components[0].formula.split()]) # print atomsPerMol # print " ".join(["_".join(x.split("_")) for x in stringIn.split()]) # def labelIncludingNumberMols(atom1Label, stoichiometry, molIndex): # element1, number1 = re.split('(\d+)', atom1Label)[:2] # return element1 + str(int(number1) + int(stoichiometry[element1]) * molIndex) for i2 in xrange(len(crystal.molecule.components)): print " +".join(["_".join([labelIncludingNumberMols(y, atomsPerMol, i2) for y in x.split("_")]) for x in stringIn.split()]) return return dict([[pair[1].label, labelIncludingNumberMols(pair[0].label, atomsPerMol, i2)] for mol1 in crystal1.molecule.components for i2, mol2 in enumerate(crystal2.molecule.components) for pair in MolecularDescriptors.MaximumCommonSubstructure().search(mol1, mol2)[0]])
def generate_ccdc_start_points(self, id_smilescode_pairs, expected_folder, output_folder, results_stream, no_assign_bond_types): """ Some simple code to use our 1D->3D methods to generate start points """ if output_folder is not None: if not os.path.exists(output_folder): os.mkdir(output_folder) first = True for pair in id_smilescode_pairs: molid = pair[0] smiles = pair[1] mol = Molecule.from_smiles(smiles, molid, True) if output_folder is not None: output_file = os.path.join(output_folder, '%s.mol2' % molid) with MoleculeWriter(output_file) as mol_writer: mol_writer.write(mol) if expected_folder is not None: expected_mol_filename = os.path.join(expected_folder, "%s.mol2" % molid) expected_molecule = self._molecule(expected_mol_filename, no_assign_bond_types) overlay = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto( mol, expected_molecule, rotate_torsions=True) if first: results_stream.write( 'id,rotated torsion rmsd,rotated torsion rmsd tanimoto\n' ) first = False results_stream.write( string.join( [molid, str(overlay[1]), str(overlay[2])], ',') + '\n')
def replacementDict(crystal1, crystal2): ''' Doesnt seem to consider 3d structure?? so may mix up enantiomers, or quasi-enantiomers ie those where the labelling of atoms makes them independent Presumably, as used as a dictionary, mol1 <-> mol2 makes little diff??? ''' import re #only works with crystal1 being z'=1 and crystal2 being same molecule but many times assert (len(crystal1.molecule.components) == 1) assert (all([ len(x.atoms) == len(crystal1.molecule.atoms) for x in crystal2.molecule.components ])) def labelIncludingNumberMols(atom1Label, stoichiometry, molIndex): if len(re.split('(\d+)', atom1Label)) > 1: element1, number1 = re.split('(\d+)', atom1Label)[:2] # It may be possible that something doesn't have a number- give it '1' - watch here if other bugs arise else: element1, number1 = re.split('(\d+)', atom1Label)[0], u'1' return element1 + str( int(number1) + int(stoichiometry[element1]) * molIndex) atomsPerMol = dict( [re.split('(\d+)', x)[:2] for x in crystal1.molecule.formula.split()]) # return dict([[labelIncludingNumberMols(pair[0].label, atomsPerMol, i2), pair[1].label] return dict( [[ pair[1].label, labelIncludingNumberMols(pair[0].label, atomsPerMol, i2) ] for mol1 in crystal1.molecule.components for i2, mol2 in enumerate(crystal2.molecule.components) for pair in MolecularDescriptors.MaximumCommonSubstructure().search( mol1, mol2)[0]])
import os from ccdc import io from ccdc.descriptors import MolecularDescriptors if __name__ == "__main__": pdb = "4G46" base = f"/local/pcurran/leads_frag/{pdb}" # test mol1 = io.MoleculeReader(os.path.join(base, f"{pdb}_ligand.mol2"))[0] mol2 = io.MoleculeReader(os.path.join(base, f"{pdb}_ref.mol2"))[0] mol3 = io.MoleculeReader(os.path.join(base, "gold/goldscore/data/ranked_4G46_ligand_m1_1.mol2"))[0] rm = [] for atm in mol3.heavy_atoms: if atm.label == "****": rm.append(atm) mol3.remove_atoms(rm) print([atm.label for atm in mol1.heavy_atoms]) print([atm.label for atm in mol2.heavy_atoms]) print([atm.label for atm in mol3.heavy_atoms]) a = MolecularDescriptors.rmsd(mol1, mol3)
conformers_mol = [mol] else: # Run minimisation print('Minimising molecular geometry using Tripos force field...') molecule_minimiser = conformer.MoleculeMinimiser( ) # Uses Tripos force field min_conformers = [] log_data = {} for conf_idx, conf in enumerate(conformers): score, rmsd = conf.normalised_score, conf.rmsd( ) # Conformer score and RMSD min_conf = molecule_minimiser.minimise( conf.molecule) # Minimise conformer min_conformers.append(min_conf) # Add to list min_rmsd = round( MolecularDescriptors.rmsd(mol, min_conf), 3 ) # Minimized RMSD (!!this method gives different results compared to conf.rmsd()) conf_name = '%s-%i' % (mol_name, conf_idx + 1) log_data[conf_name] = [score, rmsd, min_rmsd] conf_path = os.path.join(conformers_dir, '%s.%s' % (conf_name, args.format)) with io.MoleculeWriter(conf_path) as molecule_writer: molecule_writer.write(min_conf) conformers_mol = min_conformers if args.log: print('Saving log file (log.yml)...') with open('log.yml', 'w') as log: yaml.dump(log_data, log) print('Conformers saved in %s | format: %s\n' % (conformers_dir, args.format))
def do_dock(args): pdb, data_dir, fit_pts, fit_pts_path, run_id = args protein_path = os.path.join(data_dir, pdb, f"{pdb}_receptor.mol2") ligand_path = os.path.join(data_dir, pdb, f"{pdb}_ligand.mol2") ref_path = os.path.join(data_dir, pdb, f"{pdb}_ref.mol2") if fit_pts == "1": fp_path = os.path.join(data_dir, pdb, fit_pts_path) assert os.path.exists(fp_path) else: fp_path = "fit_pts.mol2" scoring_funcs = ["goldscore", "chemscore", "asp", "plp"] auto_scale = 1 gold_exe = "/local/pcurran/CCDC/Custom_GOLD/discovery-build-developer/bin/gold_auto" dock_func = [] rescor_func = [] rmsds = [] for scor in scoring_funcs: for rescor in scoring_funcs: outdir = check_dir(os.path.join(data_dir, pdb, run_id)) if scor == rescor: rescor = None outdir = check_dir(os.path.join(outdir, f"{scor}")) else: outdir = check_dir(os.path.join(outdir, f"{scor}_{rescor}")) dump_dir = check_dir(os.path.join(outdir, "data")) conf_file = template(auto_scale, ref_path, ligand_path, protein_path, scor, rescor, dump_dir=dump_dir, fit_pts=fp_path, fit=fit_pts) with open(os.path.join(outdir, "gold.conf"), "w") as w: w.write(conf_file) cmd = f"{gold_exe} {outdir}/gold.conf" os.system(cmd) ref_path = os.path.join(data_dir, pdb, f"{pdb}_ref-ligand.pdb") ref = MoleculeReader(ref_path)[0] docks = [remove_dummy_atoms( MoleculeReader(os.path.join(dump_dir, f"ranked_{pdb}_ligand_m1_{i}.mol2"))[0] ) for i in range(1, 31)] r = [MolecularDescriptors.rmsd(ref, dock, exclude_hydrogens=True) for dock in docks] dock_func.append(scor) rescor_func.append(rescor) rmsds.append(r) runs = len(scoring_funcs) ** 2 ranked_rmsds = zip(*rmsds) data = {"pdbs": [pdb] * runs, "run": [run_id] * runs, "dock_func": dock_func, "rescor_func": rescor_func} data.update({f"r{i}": x for i, x in enumerate(ranked_rmsds)}) df = pd.DataFrame(data) df.to_csv(os.path.join(data_dir, pdb, run_id, "results.csv"))
def evaluate_molecule(self, molid, input_molecule, expected_molecule, results_folder, cg_settings, save_best, save_all, generator, rmsd_method): """ Runs the conformer generator on the input molecule and then reports statistics for the resultant conformers as compared to the expected molecule. returns a tuple containing overlay results for the first conformer generated, """ #global_logger.set_ccdc_log_level(1) # Assess Input RMSD input_overlay = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto( input_molecule, expected_molecule, rotate_torsions=True) input_overlay_with_inversion = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto( input_molecule, expected_molecule, invert=True, rotate_torsions=True) without_inversion_input_rmsd = input_overlay[1] best_input_rmsd = min(without_inversion_input_rmsd, input_overlay_with_inversion[1]) settings = conformer_module.ConformerSettings() if cg_settings is not None: settings = cg_settings print('Generating', molid) start = time.time() if generator == 'etkdg': if not etkdg_available: raise runtime_error('etkdg is not available') conformer_generator = etkdg_generator.EtkdgGenerator(settings) else: conformer_generator = conformer_module.ConformerGenerator(settings) conformers = conformer_generator.generate(input_molecule) end = time.time() gentime = end - start print(molid, 'Took', gentime) #global_logger.set_ccdc_log_level(0) conformer_number = 0 rank_of_best = 0 best_overlay_result = None best_overlay_molecule = None first_overlay_result = None rank_of_first_with_rmsd_less_than = {} for threshold in self.thresholds: rank_of_first_with_rmsd_less_than[threshold] = -1 ensemble_size = len(conformers) for conformer in conformers: conformer_number = conformer_number + 1 if rmsd_method == "rdkit": overlay = self.rdkit_rmsd(conformer.molecule, expected_molecule) else: overlay = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto( conformer.molecule, expected_molecule, rotate_torsions=False) if conformer_number == 1: first_overlay_result = overlay if best_overlay_result is None or overlay[1] < best_overlay_result[ 1]: best_overlay_result = overlay rank_of_best = conformer_number - 1 best_overlay_molecule = conformer.molecule for key in rank_of_first_with_rmsd_less_than.keys(): if rank_of_first_with_rmsd_less_than[key] == -1: if overlay[1] < key: rank_of_first_with_rmsd_less_than[ key] = conformer_number if results_folder is not None: if save_best: filename = os.path.join(results_folder, "%s_best.mol2" % molid) with MoleculeWriter(filename) as mol_writer: mol_writer.write(conformers[rank_of_best].molecule) if save_all: filename = os.path.join(results_folder, "%s_all.mol2" % molid) with MoleculeWriter(filename) as mol_writer: for conf in conformers: mol_writer.write(conf.molecule) input_with_best = MolecularDescriptors.overlay_rmsd_and_rmsd_tanimoto( best_overlay_molecule, input_molecule, rotate_torsions=False) probably_input = input_with_best[1] < 0.001 degrees_of_freedom = conformers.n_flexible_rings_in_molecule + conformers.n_rotamers_in_molecule return { 'first_overlay_result': first_overlay_result, 'best_overlay_result': best_overlay_result, 'rank_of_first_with_rmsd_less_than': rank_of_first_with_rmsd_less_than, 'ensemble_size': ensemble_size, 'gentime': gentime, 'probably_input': probably_input, 'without_inversion_input_rmsd': without_inversion_input_rmsd, 'best_input_rmsd': best_input_rmsd, 'rank_of_best': rank_of_best + 1, 'degrees_of_freedom': degrees_of_freedom }
print("Processed file --> " + file_name) crystal_reader = CrystalReader(file_path) crystal = crystal_reader[0] crystal.assign_bonds() packed_molecules = crystal.packing(box_dimensions=((0, 0, 0), (1, 1, 1)), inclusion='CentroidIncluded') packed_molecules.normalise_labels() adta_molecules = [] cent = [] cent_points = [] for comp in packed_molecules.components: if (len(comp.atoms) > 1): adta_molecules.append(comp) cent.append(MD.atom_centroid(*list(a for a in comp.atoms))) for c in cent: cent_points.append([round(c[0], 3), round(c[1], 3), round(c[2], 3)]) #--------------------------------------------------------------------- #FOR FILES start TO end IN THE INPUT DIRECTORY #EXPORT PAIRWISE COMPARISON TABLES FOR RMSD, POWDER AND LK #IF ALL SET TO TRUE. #DEFAULT SETTINGS RUN POWDER AND RMSD FOR ALL FILES IN DIRECTORY #--------------------------------------------------------------------- def linkcomp(start, end, pcomp=True, rcomp=True, lktest=False):