def test18GetBestRMSAndConjugatedGroups(self): mol = Chem.MolFromSmiles( "CCC(=O)[O-] |(-1.11,0.08,-0.29;0.08,-0.18,0.58;1.34,0.03,-0.16;1.74,1.22,-0.32;2.06,-1.04,-0.66)|" ) qry = Chem.MolFromSmiles( "CCC([O-])=O |(-1.11,0.08,-0.29;0.08,-0.18,0.58;1.34,0.03,-0.16;1.74,1.22,-0.32;2.06,-1.04,-0.66)|" ) rmsd = rdMolAlign.GetBestRMS(qry, mol) self.failUnlessAlmostEqual(rmsd, 0, 3) rmsd = rdMolAlign.GetBestRMS(qry, mol, symmetrizeConjugatedTerminalGroups=False) self.failUnlessAlmostEqual(rmsd, 0.747, 3)
def rmsd_frag_mol(gen_mol, ref_mol, start_pt): try: # Delete linker - Gen mol du = Chem.MolFromSmiles('*') clean_frag = Chem.RemoveHs( AllChem.ReplaceSubstructs(Chem.MolFromSmiles(start_pt), du, Chem.MolFromSmiles('[H]'), True)[0]) fragmented_mol = get_frags(gen_mol, clean_frag, start_pt) if fragmented_mol is not None: # Delete linker - Ref mol clean_frag_ref = Chem.RemoveHs( AllChem.ReplaceSubstructs(Chem.MolFromSmiles(start_pt), du, Chem.MolFromSmiles('[H]'), True)[0]) fragmented_mol_ref = get_frags(ref_mol, clean_frag_ref, start_pt) if fragmented_mol_ref is not None: # Sanitize Chem.SanitizeMol(fragmented_mol) Chem.SanitizeMol(fragmented_mol_ref) # Align pyO3A = rdMolAlign.GetO3A(fragmented_mol, fragmented_mol_ref).Align() rms = rdMolAlign.GetBestRMS(fragmented_mol, fragmented_mol_ref) return rms #score except: return 100 # Dummy RMSD
def test17GetBestRMS(self): sdf = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign', 'test_data', 'probe_mol.sdf') molS = Chem.SDMolSupplier(sdf, True, False) mol1 = molS[1] mol2 = molS[2] # AlignMol() would return this for the rms: 2.50561 # But the best rms is: 2.43449 rmsd = rdMolAlign.GetBestRMS(mol1, mol2) self.failUnlessAlmostEqual(rmsd, 2.43449209)
def compare_conformers(reference, result, rmsd_cutoff): """ For different methods, match the conformer minima to those of the reference method. Ex. Conf G of reference method matches with conf R of method 2. Parameters ---------- in_dict : OrderedDict dictionary from input file, where key is method and value is dictionary first entry should be reference method in sub-dictionary, keys are 'sdfile' and 'sdtag' rmsd_cutoff : float cutoff above which two structures are considered diff conformers Returns ------- mol_dict : dict of dicts mol_dict['mol_name']['energies'] = [[file1_conf1_E file1_conf2_E] [file2_conf1_E file2_conf2_E]] An analogous structure is followed for mol_dict['mol_name']['indices']. """ conformer_match = reference.copy() for mid in tqdm(reference.molecule_index.unique(), desc='Matching conformers'): ref_confs = reference.loc[reference.molecule_index == mid] query_confs = result.loc[result.molecule_index == mid] rms_matrix = {i: {} for i, ref_row in ref_confs.iterrows()} for i, ref_row in ref_confs.iterrows(): for j, query_row in query_confs.iterrows(): rmsd = rdMolAlign.GetBestRMS(ref_row['mol'].to_rdkit(), query_row['mol'].to_rdkit()) rms_matrix[i][j] = rmsd for ref, rms_list in rms_matrix.items(): conf = min(rms_list, key=rms_list.get) conformer_match.loc[ref, 'ff_mol_name'] = conf conformer_match.loc[ref, 'rmsd'] = rms_list[conf] return conformer_match
def opt_conf(i, rmsd_cutoff): """ A helper function to optimize the geometry of a conformer. Only for use within this parent function """ conformer = conformers[i] calculator = conformer.ase_molecule.get_calculator() labels = [] for bond in conformer.get_bonds(): labels.append(bond.atom_indices) if isinstance(conformer, TS): label = conformer.reaction_label ind1 = conformer.rmg_molecule.get_labeled_atoms("*1")[0].sorting_label ind2 = conformer.rmg_molecule.get_labeled_atoms("*3")[0].sorting_label labels.append([ind1, ind2]) type = 'ts' else: label = conformer.smiles type = 'species' if isinstance(calc, FileIOCalculator): if calculator.directory: directory = calculator.directory else: directory = 'conformer_logs' calculator.label = "{}_{}".format(conformer.smiles, i) calculator.directory = os.path.join(directory, label,'{}_{}'.format(conformer.smiles, i)) if not os.path.exists(calculator.directory): try: os.makedirs(calculator.directory) except OSError: logging.info("An error occured when creating {}".format(calculator.directory)) calculator.atoms = conformer.ase_molecule conformer.ase_molecule.set_calculator(calculator) opt = BFGS(conformer.ase_molecule, logfile=None) if type == 'species': if isinstance(i,int): c = FixBondLengths(labels) conformer.ase_molecule.set_constraint(c) try: opt.run(steps=1e6) except RuntimeError: logging.info("Optimization failed...we will use the unconverged geometry") pass if str(i) == 'ref': conformer.update_coords_from("ase") try: rmg_mol = Molecule() rmg_mol.from_xyz( conformer.ase_molecule.arrays["numbers"], conformer.ase_molecule.arrays["positions"] ) if not rmg_mol.is_isomorphic(reference_mol): logging.info("{}_{} is not isomorphic with reference mol".format(conformer,str(i))) return False except AtomTypeError: logging.info("Could not create a RMG Molecule from optimized conformer coordinates...assuming not isomorphic") return False if type == 'ts': c = FixBondLengths(labels) conformer.ase_molecule.set_constraint(c) try: opt.run(fmax=0.20, steps=1e6) except RuntimeError: logging.info("Optimization failed...we will use the unconverged geometry") pass conformer.update_coords_from("ase") energy = get_energy(conformer) conformer.energy = energy if len(return_dict)>0: conformer_copy = conformer.copy() for index,post in return_dict.items(): conf_copy = conformer.copy() conf_copy.ase_molecule.positions = post conf_copy.update_coords_from("ase") rmsd = rdMolAlign.GetBestRMS(conformer_copy.rdkit_molecule,conf_copy.rdkit_molecule) if rmsd <= rmsd_cutoff: return True if str(i) != 'ref': return_dict[i] = conformer.ase_molecule.get_positions() return True
def systematic_search(conformer, delta=float(120), energy_cutoff = 10.0, #kcal/mol rmsd_cutoff = 0.5, #angstroms cistrans = True, chiral_centers = True, multiplicity = False, ): """ Perfoms a systematic conformer analysis of a `Conformer` or a `TS` object Variables: - conformer (`Conformer` or `TS`): a `Conformer` or `TS` object of interest - delta (int or float): a number between 0 and 180 or how many conformers to generate per dihedral - cistrans (bool): indication of if one wants to consider cistrans bonds - chiral_centers (bool): indication of if one wants to consider chiral centers bonds Returns: - confs (list): a list of unique `Conformer` objects within 1 kcal/mol of the lowest energy conformer determined """ rmsd_cutoff_options = { 'loose' : 1.0, 'default': 0.5, 'tight': 0.1 } energy_cutoff_options = { 'high' : 50.0, 'default' : 10.0, 'low' : 5.0 } if isinstance(rmsd_cutoff,str): rmsd_cutoff = rmsd_cutoff.lower() assert rmsd_cutoff in rmsd_cutoff_options.keys(), 'rmsd_cutoff options are loose, default, and tight' rmsd_cutoff = rmsd_cutoff_options[rmsd_cutoff] if isinstance(energy_cutoff,str): energy_cutoff = energy_cutoff.lower() assert energy_cutoff in energy_cutoff_options.keys(), 'energy_cutoff options are low, default, and high' energy_cutoff = energy_cutoff_options[energy_cutoff] if not isinstance(conformer, TS): reference_mol = conformer.rmg_molecule.copy(deep=True) reference_mol = reference_mol.to_single_bonds() manager = Manager() return_dict = manager.dict() pool = multiprocessing.Pool() def opt_conf(i, rmsd_cutoff): """ A helper function to optimize the geometry of a conformer. Only for use within this parent function """ conformer = conformers[i] calculator = conformer.ase_molecule.get_calculator() labels = [] for bond in conformer.get_bonds(): labels.append(bond.atom_indices) if isinstance(conformer, TS): label = conformer.reaction_label ind1 = conformer.rmg_molecule.get_labeled_atoms("*1")[0].sorting_label ind2 = conformer.rmg_molecule.get_labeled_atoms("*3")[0].sorting_label labels.append([ind1, ind2]) type = 'ts' else: label = conformer.smiles type = 'species' if isinstance(calc, FileIOCalculator): if calculator.directory: directory = calculator.directory else: directory = 'conformer_logs' calculator.label = "{}_{}".format(conformer.smiles, i) calculator.directory = os.path.join(directory, label,'{}_{}'.format(conformer.smiles, i)) if not os.path.exists(calculator.directory): try: os.makedirs(calculator.directory) except OSError: logging.info("An error occured when creating {}".format(calculator.directory)) calculator.atoms = conformer.ase_molecule conformer.ase_molecule.set_calculator(calculator) opt = BFGS(conformer.ase_molecule, logfile=None) if type == 'species': if isinstance(i,int): c = FixBondLengths(labels) conformer.ase_molecule.set_constraint(c) try: opt.run(steps=1e6) except RuntimeError: logging.info("Optimization failed...we will use the unconverged geometry") pass if str(i) == 'ref': conformer.update_coords_from("ase") try: rmg_mol = Molecule() rmg_mol.from_xyz( conformer.ase_molecule.arrays["numbers"], conformer.ase_molecule.arrays["positions"] ) if not rmg_mol.is_isomorphic(reference_mol): logging.info("{}_{} is not isomorphic with reference mol".format(conformer,str(i))) return False except AtomTypeError: logging.info("Could not create a RMG Molecule from optimized conformer coordinates...assuming not isomorphic") return False if type == 'ts': c = FixBondLengths(labels) conformer.ase_molecule.set_constraint(c) try: opt.run(fmax=0.20, steps=1e6) except RuntimeError: logging.info("Optimization failed...we will use the unconverged geometry") pass conformer.update_coords_from("ase") energy = get_energy(conformer) conformer.energy = energy if len(return_dict)>0: conformer_copy = conformer.copy() for index,post in return_dict.items(): conf_copy = conformer.copy() conf_copy.ase_molecule.positions = post conf_copy.update_coords_from("ase") rmsd = rdMolAlign.GetBestRMS(conformer_copy.rdkit_molecule,conf_copy.rdkit_molecule) if rmsd <= rmsd_cutoff: return True if str(i) != 'ref': return_dict[i] = conformer.ase_molecule.get_positions() return True #if not isinstance(conformer,TS): # calc = conformer.ase_molecule.get_calculator() # reference_conformer = conformer.copy() # if opt_conf(reference_conformer, calc, 'ref', rmsd_cutoff): # conformer = reference_conformer combos = find_all_combos( conformer, delta=delta, cistrans=cistrans, chiral_centers=chiral_centers) if len(combos) == 0: logging.info( "This species has no torsions, cistrans bonds, or chiral centers") logging.info("Returning origional conformer") return [conformer] _, torsions = find_terminal_torsions(conformer) calc = conformer.ase_molecule.get_calculator() if isinstance(calc, FileIOCalculator): logging.info("The calculator generates input and output files.") results = [] global conformers conformers = {} combinations = {} logging.info("There are {} possible conformers to investigate...".format(len(combos))) for index, combo in enumerate(combos): combinations[index] = combo torsions, cistrans, chiral_centers = combo copy_conf = conformer.copy() for i, torsion in enumerate(torsions): tor = copy_conf.torsions[i] i, j, k, l = tor.atom_indices mask = tor.mask copy_conf.ase_molecule.set_dihedral( a1=i, a2=j, a3=k, a4=l, angle=torsion, mask=mask ) copy_conf.update_coords() for i, e_z in enumerate(cistrans): ct = copy_conf.cistrans[i] copy_conf.set_cistrans(ct.index, e_z) for i, s_r in enumerate(chiral_centers): center = copy_conf.chiral_centers[i] copy_conf.set_chirality(center.index, s_r) copy_conf.update_coords_from("ase") copy_conf.ase_molecule.set_calculator(calc) conformers[index] = copy_conf processes = [] for i, conf in list(conformers.items()): p = Process(target=opt_conf, args=(i, rmsd_cutoff)) processes.append(p) active_processes = [] for process in processes: if len(active_processes) < multiprocessing.cpu_count(): process.start() active_processes.append(process) continue else: one_done = False while not one_done: for i, p in enumerate(active_processes): if not p.is_alive(): one_done = True break process.start() active_processes[i] = process complete = np.zeros_like(active_processes, dtype=bool) while not np.all(complete): for i, p in enumerate(active_processes): if not p.is_alive(): complete[i] = True energies = [] for positions in list(return_dict.values()): conf = conformer.copy() conf.ase_molecule.positions = positions conf.ase_molecule.set_calculator(calc) energy = conf.ase_molecule.get_potential_energy() conf.update_coords_from("ase") energies.append((conf,energy)) df = pd.DataFrame(energies,columns=["conformer","energy"]) df = df[df.energy < df.energy.min() + (energy_cutoff * units.kcal / units.mol / units.eV)].sort_values("energy").reset_index(drop=True) redundant = [] conformer_copies = [conf.copy() for conf in df.conformer] for i,j in itertools.combinations(range(len(df.conformer)),2): copy_1 = conformer_copies[i].rdkit_molecule copy_2 = conformer_copies[j].rdkit_molecule rmsd = rdMolAlign.GetBestRMS(copy_1,copy_2) if rmsd <= rmsd_cutoff: redundant.append(j) redundant = list(set(redundant)) df.drop(df.index[redundant], inplace=True) if multiplicity and conformer.rmg_molecule.multiplicity > 2: rads = conformer.rmg_molecule.get_radical_count() if rads % 2 == 0: multiplicities = range(1,rads+2,2) else: multiplicities = range(2,rads+2,2) else: multiplicities = [conformer.rmg_molecule.multiplicity] confs = [] i = 0 for conf in df.conformer: if multiplicity: for mult in multiplicities: conf_copy = conf.copy() conf_copy.index = i conf_copy.rmg_molecule.multiplicity = mult confs.append(conf_copy) i += 1 else: conf.index = i confs.append(conf) i += 1 logging.info("We have identified {} unique, low-energy conformers for {}".format( len(confs), conformer)) return confs
def calc_rmsd(reference, result): for i, row in tqdm(reference.iterrows(), desc='Calculating RMSD'): result.loc[i, 'rmsd'] = rdMolAlign.GetBestRMS( row['mol'].to_rdkit(), result.loc[i, 'mol'].to_rdkit())