def get_canonical_smiles_from_mol2(mol2_dict): canonical_smiles_dict = {} for i, (mol_name, mol2_file) in enumerate(mol2_dict.items()): mol = mol_from_mol2(mol2_file, mol_name) try: mol = mol_to_standardised_mol(mol) except: pass canon_smiles = MolToSmiles(mol, isomericSmiles=True) canonical_smiles_dict[mol_name] = canon_smiles return canonical_smiles_dict
def test_standardisation(self): import rdkit.Chem from e3fp.conformer.util import ( mol_from_smiles, mol_to_standardised_mol, ) smiles = "C[N-]c1cccc[n+]1C" mol = mol_from_smiles(smiles, "tmp") self.assertEqual(rdkit.Chem.MolToSmiles(mol), smiles) mol = mol_to_standardised_mol(mol) self.assertEqual(rdkit.Chem.MolToSmiles(mol), "CN=c1ccccn1C")
def generate_conformers( input_mol, name=None, standardise=STANDARDISE_DEF, num_conf=NUM_CONF_DEF, first=FIRST_DEF, pool_multiplier=POOL_MULTIPLIER_DEF, rmsd_cutoff=RMSD_CUTOFF_DEF, max_energy_diff=MAX_ENERGY_DIFF_DEF, forcefield=FORCEFIELD_DEF, seed=SEED_DEF, out_file=None, out_dir=OUTDIR_DEF, save=False, compress=COMPRESS_DEF, overwrite=False, ): """Generate and save conformers for molecules. Parameters ---------- input_mol : RDKit Mol Mol with a single conformer from which to generate conformers. name : str, optional Name of molecule. standardise : bool, optional Standardise mol before generating conformers. num_conf : int, optional If int, this is the target number of conformations. If -1, number of conformations is automatically chosen based on number of rotatable bonds. first : int, optional Number of first conformers to return. Does not impact conformer generator process, except may terminate conformer generation early when this many of conformers have been accepted. pool_multiplier : int, optional Factor to multiply by `num_conf`. The resulting number of conformations will be generated, then pruned to `num_conf`. rmsd_cutoff : float, optional RMSD threshold above which to accept two conformations as different max_energy_diff : float, optional Maximum energy difference between lowest energy conformer and any accepted conformer. forcefield : {'uff', 'mmff94', 'mmff94s'}, optional Forcefield to use for minimization of conformers. seed : int, optional Random seed for conformer generation. If -1, the random number generator is unseeded. out_file : str, optional Filename to save output, if `save` is True. If None, filename will be `name`.sdf, optionally with a compressed extension. out_dir : str, optional Directory where output files will be saved if `save` is True. save : bool, optional Save conformers to `out_file` in `out_dir`. compress : int, optional Compression of SDF files. None: auto. Mode is chosen based on extension, defaulting to SDF. 0: File is not compressed. 1: File is gzipped (.gz) 2: File is bzipped (.bz2) overwrite : bool, optional Overwrite output files if they already exist. Returns ------- bool If something went wrong, only return False. Otherwise return below. tuple Tuple with molecule name, number of rotatable bonds, numpy array of indices of final conformations, numpy array of energies of all conformations generated, and 2D numpy array of pairwise RMSDs between final conformations. """ if name is None: name = input_mol.GetProp("_Name") if standardise: input_mol = mol_to_standardised_mol(input_mol) if save: if out_file is None: extensions = ("", ".gz", ".bz2") if compress not in (0, 1, 2): compress = 0 out_file = os.path.join( out_dir, "{}.sdf{}".format(name, extensions[compress])) if os.path.exists(out_file) and not overwrite: logging.warning("{} already exists. Skipping.".format(out_file)) return False logging.info("Generating conformers for {}.".format(name)) try: conf_gen = ConformerGenerator( num_conf=num_conf, first=first, pool_multiplier=pool_multiplier, rmsd_cutoff=rmsd_cutoff, max_energy_diff=max_energy_diff, forcefield=forcefield, seed=seed, get_values=True, ) mol, values = conf_gen.generate_conformers(input_mol) logging.info("Generated {:d} conformers for {}.".format( mol.GetNumConformers(), name)) except Exception: logging.warning("Problem generating conformers for {}.".format(name), exc_info=True) return False if save: try: mol_to_sdf(mol, out_file) logging.info("Saved conformers for {} to {}.".format( name, out_file)) except Exception: logging.warning( "Problem saving conformers for {} to {}.".format( name, out_file), exc_info=True, ) return (mol, name, AllChem.CalcNumRotatableBonds(mol)) + values