Esempio n. 1
0
def rdkit_mmff94_xyz(smiles, **kwargs):
    """
    Returns the string of the XYZ file obtained performing the MMFF94 molecular mechanics optimization of the given
    SMILES using RDKit.
    Writing temporary files in $MM_WORKING_DIR if defined or otherwise in /tmp
    :param smiles: input_SMILES
    :param max_iterations: max number of iterations (default 500)
    :return : XYZ string of optimized geometry, success (whether the MM optimization was successful and the smiles has
    stayed identical after optimization)
    """

    working_dir = os.environ[
        "MM_WORKING_DIR"] if "MM_WORKING_DIR" in os.environ else "/tmp"

    # Converting the molecule to RDKit object
    mol = MolFromSmiles(smiles)
    smi_canon = MolToSmiles(MolFromSmiles(smiles))

    # Setting paths
    filename_smiles = str(os.getpid()) + "_" + smi_to_filename(smi_canon)
    xyz_path = join(working_dir, filename_smiles + '.xyz')
    post_MM_smi_path = join(working_dir, filename_smiles + '.smi')

    # Computing geometry
    try:

        # Adding implicit hydrogens
        mol = AddHs(mol)

        # MM optimization
        EmbedMolecule(mol)

        value = MMFFOptimizeMolecule(mol, maxIters=kwargs["max_iterations"])

        # Success if returned value is null
        success_RDKIT_output = value == 0

        # Computing XYZ from optimized molecule
        xyz_str = MolToXYZBlock(mol)

        # Writing optimized XYZ to file
        with open(xyz_path, "w") as f:
            f.writelines(xyz_str)

        # Success if the optimization has converged and the post MM smiles is identical the pre MM smiles
        success = success_RDKIT_output and check_identical_geometries(
            xyz_path, smi_canon, post_MM_smi_path)

    except Exception as e:
        success = False
        xyz_str = None
    finally:
        # Removing files
        remove_files([post_MM_smi_path, xyz_path])

    return xyz_str, success
Esempio n. 2
0
    def embed(self, mol):

        success = EmbedMolecule(mol)
        if success == -1:
            msg = 'Failed to Embed Molecule {}'.format(mol.name)
            if self.error_on_fail:
                raise RuntimeError(msg)
            elif self.warn_on_fail:
                warnings.warn(msg)
            return None

        if self.add_hs:
            return mol.add_hs(add_coords=True)
        else:
            return mol
Esempio n. 3
0
def gen3D(mol):
    mol = Chem.AddHs(mol)
    if EmbedMolecule(mol) != 0:
        raise ValueError("EmbedMolecule failed")

    uff = False

    try:
        optimize(mol, MMFFOptimizeMolecule)
    except ValueError:
        uff = True

    if uff:
        optimize(mol, UFFOptimizeMolecule)

    return uff, mol
Esempio n. 4
0
# 入力する分子(thiametoxam)

smiles = 'CN1COCN(C1=N[N+](=O)[O-])CC2=CN=C(S2)Cl'

# ファイル名を決める
t = datetime.datetime.fromtimestamp(time.time())
psi4.set_output_file("{}_{}{}{}_{}{}.log".format(smiles, t.year, t.month,
                                                 t.day, t.hour, t.minute))

# SMILES から三次元構造を発生させて、粗3D構造最適化
mol = Chem.MolFromSmiles(smiles)
mol = Chem.AddHs(mol)
params = ETKDGv3()
params.randomSeed = 1
EmbedMolecule(mol, params)

# MMFF(Merck Molecular Force Field) で構造最適化する
MMFFOptimizeMolecule(mol)
#UFF(Universal Force Field)普遍力場で構造最適化したい場合は
#UFFOptimizeMolecule(mol)

conf = mol.GetConformer()

# Psi4 に入力可能な形式に変換する。
# 電荷とスピン多重度を設定(下は、電荷0、スピン多重度1)
mol_input = "0 1"

#各々の原子の座標をXYZフォーマットで記述
for atom in mol.GetAtoms():
    mol_input += "\n " + atom.GetSymbol() + " " + str(conf.GetAtomPosition(atom.GetIdx()).x)\
Esempio n. 5
0
def EnumerateStereoisomers(m,
                           options=StereoEnumerationOptions(),
                           verbose=False):
    """ returns a generator that yields possible stereoisomers for a molecule

    Arguments:
      - m: the molecule to work with
      - options: parameters controlling the enumeration
      - verbose: toggles how verbose the output is

    If m has stereogroups, they will be expanded

    A small example with 3 chiral atoms and 1 chiral bond (16 theoretical stereoisomers):

    >>> from rdkit import Chem
    >>> from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions
    >>> m = Chem.MolFromSmiles('BrC=CC1OC(C2)(F)C2(Cl)C1')
    >>> isomers = tuple(EnumerateStereoisomers(m))
    >>> len(isomers)
    16
    >>> for smi in sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers):
    ...     print(smi)
    ...
    F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2
    F[C@@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2
    F[C@@]12C[C@@]1(Cl)C[C@H](/C=C\\Br)O2
    F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2
    F[C@@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2
    F[C@@]12C[C@]1(Cl)C[C@H](/C=C\\Br)O2
    F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2
    F[C@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2
    F[C@]12C[C@@]1(Cl)C[C@H](/C=C\\Br)O2
    F[C@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2
    F[C@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2
    F[C@]12C[C@]1(Cl)C[C@H](/C=C\\Br)O2

    Because the molecule is constrained, not all of those isomers can
    actually exist. We can check that:

    >>> opts = StereoEnumerationOptions(tryEmbedding=True)
    >>> isomers = tuple(EnumerateStereoisomers(m, options=opts))
    >>> len(isomers)
    8
    >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers):
    ...     print(smi)
    ...
    F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2
    F[C@@]12C[C@]1(Cl)C[C@H](/C=C/Br)O2
    F[C@@]12C[C@]1(Cl)C[C@H](/C=C\\Br)O2
    F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2
    F[C@]12C[C@@]1(Cl)C[C@H](/C=C/Br)O2
    F[C@]12C[C@@]1(Cl)C[C@H](/C=C\\Br)O2

    Or we can force the output to only give us unique isomers:

    >>> m = Chem.MolFromSmiles('FC(Cl)C=CC=CC(F)Cl')
    >>> opts = StereoEnumerationOptions(unique=True)
    >>> isomers = tuple(EnumerateStereoisomers(m, options=opts))
    >>> len(isomers)
    10
    >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers):
    ...     print(smi)
    ...
    F[C@@H](Cl)/C=C/C=C/[C@@H](F)Cl
    F[C@@H](Cl)/C=C/C=C/[C@H](F)Cl
    F[C@@H](Cl)/C=C/C=C\\[C@H](F)Cl
    F[C@@H](Cl)/C=C\\C=C/[C@@H](F)Cl
    F[C@@H](Cl)/C=C\\C=C/[C@H](F)Cl
    F[C@@H](Cl)/C=C\\C=C\\[C@@H](F)Cl
    F[C@@H](Cl)/C=C\\C=C\\[C@H](F)Cl
    F[C@H](Cl)/C=C/C=C/[C@H](F)Cl
    F[C@H](Cl)/C=C\\C=C/[C@H](F)Cl
    F[C@H](Cl)/C=C\\C=C\\[C@H](F)Cl

    By default the code only expands unspecified stereocenters:

    >>> m = Chem.MolFromSmiles('BrC=C[C@H]1OC(C2)(F)C2(Cl)C1')
    >>> isomers = tuple(EnumerateStereoisomers(m))
    >>> len(isomers)
    8
    >>> for smi in sorted(Chem.MolToSmiles(x,isomericSmiles=True) for x in isomers):
    ...     print(smi)
    ...
    F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2
    F[C@@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2
    F[C@]12C[C@@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@]12C[C@@]1(Cl)C[C@@H](/C=C\\Br)O2
    F[C@]12C[C@]1(Cl)C[C@@H](/C=C/Br)O2
    F[C@]12C[C@]1(Cl)C[C@@H](/C=C\\Br)O2

    But we can change that behavior:

    >>> opts = StereoEnumerationOptions(onlyUnassigned=False)
    >>> isomers = tuple(EnumerateStereoisomers(m, options=opts))
    >>> len(isomers)
    16

    Since the result is a generator, we can allow exploring at least parts of very
    large result sets:

    >>> m = Chem.MolFromSmiles('Br' + '[CH](Cl)' * 20 + 'F')
    >>> opts = StereoEnumerationOptions(maxIsomers=0)
    >>> isomers = EnumerateStereoisomers(m, options=opts)
    >>> for x in range(5):
    ...   print(Chem.MolToSmiles(next(isomers),isomericSmiles=True))
    F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)Br
    F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)Br
    F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br
    F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)Br
    F[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)Br

    Or randomly sample a small subset. Note that if we want that sampling to be consistent
    across python versions we need to provide a random number seed:

    >>> m = Chem.MolFromSmiles('Br' + '[CH](Cl)' * 20 + 'F')
    >>> opts = StereoEnumerationOptions(maxIsomers=3,rand=0xf00d)
    >>> isomers = EnumerateStereoisomers(m, options=opts)
    >>> for smi in isomers: #sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers):
    ...     print(Chem.MolToSmiles(smi))
    F[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)Br
    F[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)Br
    F[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)Br

    """
    tm = Chem.Mol(m)
    for atom in tm.GetAtoms():
        atom.ClearProp("_CIPCode")
    flippers = _getFlippers(tm, options)
    nCenters = len(flippers)
    if not nCenters:
        yield tm
        return

    if (options.maxIsomers == 0 or 2**nCenters <= options.maxIsomers):
        bitsource = _RangeBitsGenerator(nCenters)
    else:
        if options.rand is None:
            # deterministic random seed invariant to input atom order
            seed = hash(
                tuple(
                    sorted([(a.GetDegree(), a.GetAtomicNum())
                            for a in tm.GetAtoms()])))
            rand = random.Random(seed)
        elif isinstance(options.rand, random.Random):
            # other implementations of Python random number generators
            # can inherit from this class to pick up utility methods
            rand = options.rand
        else:
            rand = random.Random(options.rand)

        bitsource = _UniqueRandomBitsGenerator(nCenters, options.maxIsomers,
                                               rand)

    isomersSeen = set()
    numIsomers = 0
    for bitflag in bitsource:
        for i in range(nCenters):
            flag = bool(bitflag & (1 << i))
            flippers[i].flip(flag)
        isomer = Chem.Mol(tm)
        Chem.SetDoubleBondNeighborDirections(isomer)
        isomer.ClearComputedProps()

        Chem.AssignStereochemistry(isomer,
                                   cleanIt=True,
                                   force=True,
                                   flagPossibleStereoCenters=True)
        if options.unique:
            cansmi = Chem.MolToSmiles(isomer, isomericSmiles=True)
            if cansmi in isomersSeen:
                continue

            isomersSeen.add(cansmi)

        if options.tryEmbedding:
            ntm = Chem.AddHs(isomer)
            cid = EmbedMolecule(ntm, randomSeed=bitflag)
            if cid >= 0:
                conf = Chem.Conformer(isomer.GetNumAtoms())
                for aid in range(isomer.GetNumAtoms()):
                    conf.SetAtomPosition(
                        aid,
                        ntm.GetConformer().GetAtomPosition(aid))
                isomer.AddConformer(conf)
        else:
            cid = 1
        if cid >= 0:
            yield isomer
            numIsomers += 1
            if options.maxIsomers != 0 and numIsomers >= options.maxIsomers:
                break
        elif verbose:
            print("%s    failed to embed" %
                  (Chem.MolToSmiles(isomer, isomericSmiles=True)))
Esempio n. 6
0
def build_molecules(starting_mol,
                    atom_additions=None,
                    stereoisomers=True,
                    sa_score_threshold=3.,
                    tryEmbedding=True):
    """Return an iterator of molecules that result from a single manipulation 
    (i.e., atom / bond addition) to the starting molecule
    
    Arguments:
        starting_mol: rdkit.Mol
            The starting molecule as an rdkit Mol object
        
        atom_additions: list of elements
            Types of atoms that can be added. Defaults to ('C', 'N', 'O')
            
        stereoisomers: bool
            Whether to iterate over potential stereoisomers of the given molecule
            as seperate molecules
            
        sa_score_threshold: float or None
            Whether to calculate the sa_score of the given molecule, and withold 
            molecules that have a sa_score higher than the threshold.
            
        tryEmbedding: bool
            whether to try an rdkit 3D embedding of the molecule
            
    Yields:
        rdkit.Mol, corresponding to modified input
    
    """
    if atom_additions == None:
        atom_additions = ('C', 'N', 'O')

    def get_valid_partners(atom):
        """ For a given atom, return other atoms it can be connected to """
        return list(
            set(range(starting_mol.GetNumAtoms())) -
            set((neighbor.GetIdx() for neighbor in atom.GetNeighbors())) -
            set(range(
                atom.GetIdx())) -  # Prevent duplicates by only bonding forward
            set((atom.GetIdx(), ))
            | set(np.arange(len(atom_additions)) + starting_mol.GetNumAtoms()))

    def get_valid_bonds(atom1_idx, atom2_idx):
        """ Compare free valences of two atoms to calculate valid bonds """
        free_valence_1 = get_free_valence(
            starting_mol.GetAtomWithIdx(atom1_idx))
        if atom2_idx < starting_mol.GetNumAtoms():
            free_valence_2 = get_free_valence(
                starting_mol.GetAtomWithIdx(int(atom2_idx)))
        else:
            free_valence_2 = pt.GetDefaultValence(
                atom_additions[atom2_idx - starting_mol.GetNumAtoms()])

        return range(min(min(free_valence_1, free_valence_2), 3))

    def add_bond(atom1_idx, atom2_idx, bond_type):
        """ Given two atoms and a bond type, execute the addition using rdkit """
        num_atom = starting_mol.GetNumAtoms()
        rw_mol = Chem.RWMol(starting_mol)

        if atom2_idx < num_atom:
            rw_mol.AddBond(atom1_idx, atom2_idx, bond_orders[bond_type])

        else:
            rw_mol.AddAtom(Chem.Atom(atom_additions[atom2_idx - num_atom]))
            rw_mol.AddBond(atom1_idx, num_atom, bond_orders[bond_type])

        return rw_mol

    def enumerate_stereoisomers(mol, to_use):
        """ We likely want to distinguish between stereoisomers, so we do that here """

        if not to_use:
            # Give an easy way to pass through this function if this feature isn't used
            return (mol, )

        else:
            opts = StereoEnumerationOptions(unique=True)
            return tuple(EnumerateStereoisomers(mol, options=opts))

    generated_smiles = []

    # Construct the generator
    for i, atom in enumerate(starting_mol.GetAtoms()):
        for partner in get_valid_partners(atom):
            for bond_order in get_valid_bonds(i, partner):
                mol = add_bond(i, partner, bond_order)

                Chem.SanitizeMol(mol)
                for isomer in enumerate_stereoisomers(mol, stereoisomers):
                    smiles = Chem.MolToSmiles(mol)
                    if smiles not in generated_smiles:
                        generated_smiles += [smiles]

                        if sa_score_threshold is not None:
                            if sascorer.calculateScore(
                                    isomer) >= sa_score_threshold:
                                continue

                        if tryEmbedding:
                            ntm = Chem.AddHs(isomer)

                            try:
                                assert EmbedMolecule(ntm) >= 0
                            except (AssertionError, RuntimeError):
                                # Failed a 3D embedding
                                continue

                        yield isomer