Ejemplo n.º 1
0
def get_canonical_smiles_from_mol2(mol2_dict):
    canonical_smiles_dict = {}
    for i, (mol_name, mol2_file) in enumerate(mol2_dict.items()):
        mol = mol_from_mol2(mol2_file, mol_name)
        try:
            mol = mol_to_standardised_mol(mol)
        except:
            pass
        canon_smiles = MolToSmiles(mol, isomericSmiles=True)
        canonical_smiles_dict[mol_name] = canon_smiles
    return canonical_smiles_dict
Ejemplo n.º 2
0
    def test_standardisation(self):
        import rdkit.Chem
        from e3fp.conformer.util import (
            mol_from_smiles,
            mol_to_standardised_mol,
        )

        smiles = "C[N-]c1cccc[n+]1C"
        mol = mol_from_smiles(smiles, "tmp")
        self.assertEqual(rdkit.Chem.MolToSmiles(mol), smiles)

        mol = mol_to_standardised_mol(mol)
        self.assertEqual(rdkit.Chem.MolToSmiles(mol), "CN=c1ccccn1C")
Ejemplo n.º 3
0
def generate_conformers(
    input_mol,
    name=None,
    standardise=STANDARDISE_DEF,
    num_conf=NUM_CONF_DEF,
    first=FIRST_DEF,
    pool_multiplier=POOL_MULTIPLIER_DEF,
    rmsd_cutoff=RMSD_CUTOFF_DEF,
    max_energy_diff=MAX_ENERGY_DIFF_DEF,
    forcefield=FORCEFIELD_DEF,
    seed=SEED_DEF,
    out_file=None,
    out_dir=OUTDIR_DEF,
    save=False,
    compress=COMPRESS_DEF,
    overwrite=False,
):
    """Generate and save conformers for molecules.

    Parameters
    ----------
    input_mol : RDKit Mol
        Mol with a single conformer from which to generate conformers.
    name : str, optional
        Name of molecule.
    standardise : bool, optional
        Standardise mol before generating conformers.
    num_conf : int, optional
        If int, this is the target number of conformations. If -1, number
        of conformations is automatically chosen based on number of rotatable
        bonds.
    first : int, optional
        Number of first conformers to return. Does not impact conformer
        generator process, except may terminate conformer generation early when
        this many of conformers have been accepted.
    pool_multiplier : int, optional
        Factor to multiply by `num_conf`. The resulting number of conformations
        will be generated, then pruned to `num_conf`.
    rmsd_cutoff : float, optional
        RMSD threshold above which to accept two conformations as different
    max_energy_diff : float, optional
        Maximum energy difference between lowest energy conformer and any
        accepted conformer.
    forcefield : {'uff', 'mmff94', 'mmff94s'}, optional
        Forcefield to use for minimization of conformers.
    seed : int, optional
        Random seed for conformer generation. If -1, the random number
        generator is unseeded.
    out_file : str, optional
        Filename to save output, if `save` is True. If None, filename will be
        `name`.sdf, optionally with a compressed extension.
    out_dir : str, optional
        Directory where output files will be saved if `save` is True.
    save : bool, optional
        Save conformers to `out_file` in `out_dir`.
    compress : int, optional
        Compression of SDF files.
        None: auto. Mode is chosen based on extension, defaulting to SDF.
        0: File is not compressed.
        1: File is gzipped (.gz)
        2: File is bzipped (.bz2)
    overwrite : bool, optional
        Overwrite output files if they already exist.

    Returns
    -------
    bool
        If something went wrong, only return False. Otherwise return below.
    tuple
        Tuple with molecule name, number of rotatable bonds, numpy array of
        indices of final conformations, numpy array of energies of all
        conformations generated, and 2D numpy array of pairwise RMSDs between
        final conformations.
    """
    if name is None:
        name = input_mol.GetProp("_Name")

    if standardise:
        input_mol = mol_to_standardised_mol(input_mol)

    if save:
        if out_file is None:
            extensions = ("", ".gz", ".bz2")
            if compress not in (0, 1, 2):
                compress = 0
            out_file = os.path.join(
                out_dir, "{}.sdf{}".format(name, extensions[compress]))

        if os.path.exists(out_file) and not overwrite:
            logging.warning("{} already exists. Skipping.".format(out_file))
            return False

    logging.info("Generating conformers for {}.".format(name))
    try:
        conf_gen = ConformerGenerator(
            num_conf=num_conf,
            first=first,
            pool_multiplier=pool_multiplier,
            rmsd_cutoff=rmsd_cutoff,
            max_energy_diff=max_energy_diff,
            forcefield=forcefield,
            seed=seed,
            get_values=True,
        )
        mol, values = conf_gen.generate_conformers(input_mol)
        logging.info("Generated {:d} conformers for {}.".format(
            mol.GetNumConformers(), name))
    except Exception:
        logging.warning("Problem generating conformers for {}.".format(name),
                        exc_info=True)
        return False

    if save:
        try:
            mol_to_sdf(mol, out_file)
            logging.info("Saved conformers for {} to {}.".format(
                name, out_file))
        except Exception:
            logging.warning(
                "Problem saving conformers for {} to {}.".format(
                    name, out_file),
                exc_info=True,
            )
    return (mol, name, AllChem.CalcNumRotatableBonds(mol)) + values