Beispiel #1
0
    def _standardize_smiles(smiles):
        """Standardizes a SMILES pattern to be canonical (but not necessarily isomeric)
        using the `cmiles` library.

        Parameters
        ----------
        smiles: str
            The SMILES pattern to standardize.

        Returns
        -------
        The standardized SMILES pattern.
        """
        from cmiles.utils import load_molecule, mol_to_smiles

        molecule = load_molecule(smiles, toolkit="rdkit")

        try:
            # Try to make the smiles isomeric.
            smiles = mol_to_smiles(
                molecule, isomeric=True, explicit_hydrogen=False, mapped=False
            )
        except ValueError:
            # Fall-back to non-isomeric.
            smiles = mol_to_smiles(
                molecule, isomeric=False, explicit_hydrogen=False, mapped=False
            )

        return smiles
def test_add_substituent():
    smiles = 'CCCCCC'
    mol = chemi.smiles_to_oemol(smiles)
    f = fragmenter.fragment.WBOFragmenter(mol)
    f.fragment()
    assert mol_to_smiles(f.fragments[(3, 5)],
                         mapped=False,
                         explicit_hydrogen=False) == 'CCCCC'

    mol = f.fragments[(3, 5)]
    atoms = set()
    bonds = set()
    for a in mol.GetAtoms():
        if a.IsHydrogen():
            continue
        atoms.add(a.GetMapIdx())
    for b in mol.GetBonds():
        a1 = b.GetBgn()
        a2 = b.GetEnd()
        if a1.IsHydrogen() or a2.IsHydrogen():
            continue
        bonds.add((a1.GetMapIdx(), a2.GetMapIdx()))

    mol = f._add_next_substituent(atoms, bonds, target_bond=(3, 5))

    assert mol_to_smiles(mol, mapped=False,
                         explicit_hydrogen=False) == 'CCCCCC'
Beispiel #3
0
    def test_expand_protonation_states(self):
        """Test expand protonation states"""
        smiles = 'C5=C(C1=CN=CC=C1)N=C(NC2=C(C=CC(=C2)NC(C3=CC=C(C=C3)CN4CCN(CC4)C)=O)C)N=C5'
        molecule = chemi.smiles_to_oemol(smiles)
        protonation = fragmenter.fragment._expand_states(molecule)
        protonation_1 = {
            'Cc1ccc(cc1Nc2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C',
            'Cc1ccc(cc1Nc2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C',
            'Cc1ccc(cc1Nc2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)C[NH+]5CCN(CC5)C',
            'Cc1ccc(cc1Nc2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)C[NH+]5CC[NH+](CC5)C',
            'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C',
            'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C',
            'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)C[NH+]5CCN(CC5)C',
            'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)C[NH+]5CC[NH+](CC5)C',
            'Cc1ccc(cc1[N-]c2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C',
            'Cc1ccc(cc1[N-]c2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C',
            'Cc1ccc(cc1[N-]c2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)C[NH+]5CCN(CC5)C',
            'Cc1ccc(cc1[N-]c2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)C[NH+]5CC[NH+](CC5)C',
            'Cc1ccc(cc1[N-]c2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C',
            'Cc1ccc(cc1[N-]c2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C',
            'Cc1ccc(cc1[N-]c2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)C[NH+]5CCN(CC5)C',
            'Cc1ccc(cc1[N-]c2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)C[NH+]5CC[NH+](CC5)C'
        }
        protonation_2 = set()
        for mol in protonation:
            protonation_2.add(
                mol_to_smiles(mol,
                              mapped=False,
                              explicit_hydrogen=False,
                              isomeric=True))

        intersection = protonation_1.intersection(protonation_2)
        self.assertEqual(len(intersection), len(protonation_1))
        self.assertEqual(len(intersection), len(protonation_2))
Beispiel #4
0
def test_map_order_geometry(permute, toolkit, toolkit_name):
    """Test map ordered geometry"""
    hooh = {
        'symbols': ['H', 'O', 'O', 'H'],
        'geometry': [
            1.84719633, 1.47046223, 0.80987166, 1.3126021, -0.13023157,
            -0.0513322, -1.31320906, 0.13130216, -0.05020593, -1.83756335,
            -1.48745318, 0.80161212
        ],
        'name':
        'HOOH',
        'connectivity': [[0, 1, 1], [1, 2, 1], [2, 3, 1]],
    }
    mol = utils.load_molecule(hooh, toolkit=toolkit_name, permute_xyz=permute)
    mapped_smiles = utils.mol_to_smiles(mol,
                                        isomeric=True,
                                        explicit_hydrogen=True,
                                        mapped=True)
    atom_map = utils.get_atom_map(mol, mapped_smiles)
    symbols, geometry = toolkit.get_map_ordered_geometry(mol, atom_map)

    json_geom = np.asarray(hooh['geometry']).reshape(int(len(geometry) / 3), 3)
    geometry_array = np.asarray(geometry).reshape(int(len(geometry) / 3), 3)

    for m in atom_map:
        for i in range(3):
            assert json_geom[atom_map[m]][i] == pytest.approx(
                geometry_array[m - 1][i], 0.0000001)
    if not permute:
        assert hooh['geometry'] == pytest.approx(geometry, 0.0000001)
Beispiel #5
0
    def request_callback(request, context):
        context.status_code = 200
        smiles = re.search(r'"smiData"\r\n\r\n(.*?)\r\n',
                           request.text).group(1)

        cmiles_molecule = load_molecule(smiles, toolkit="rdkit")
        smiles = mol_to_smiles(cmiles_molecule,
                               isomeric=False,
                               explicit_hydrogen=False,
                               mapped=False)

        assert smiles == "C"
        return 'value="/tmp/0000.xml"'
Beispiel #6
0
    def enumerate_states(self, molecule, title='', json_filename=None):
        """
        enumerate protonation, tautomers and stereoisomers for molecule.

        Parameters
        ----------
        molecule: any format that OpenEye pareses. Can be path to file containing molecule or SMILES/Inchi string
        workflow_id: str
            Which workflow to use as defined in data/workflows.json
        options: dict, optional, default None
            dictionary of keyword options. Default is None. If None, will use options defined in workflow ID
        title: str, optional, default empty string
            title of molecule. If None, the title of the molecule will be the IUPAC name
        json_filename: str, optional, default None
            json filename for states generated. If None will not write json file

        Returns
        -------
        json_dict: dict
            dictionary containing canonical isomeric SMILES for states and provenance.

        """
        # Load options for enumerate states
        routine = 'enumerate_states'
        options = self.off_workflow.get_options('enumerate_states')['options']
        provenance = _get_provenance(workflow_id=self.workflow_id,
                                     routine=routine)
        # if not options:
        #     options = _get_options(workflow_id, routine)

        molecule = chemi.standardize_molecule(molecule, title=title)
        can_iso_smiles = mol_to_smiles(molecule,
                                       isomeric=True,
                                       mapped=False,
                                       explicit_hydrogen=False)
        states = fragment.expand_states(molecule, **options)

        provenance['routine']['enumerate_states'][
            'parent_molecule'] = can_iso_smiles
        provenance['routine']['enumerate_states'][
            'parent_molecule_name'] = molecule.GetTitle()
        json_dict = {'provenance': provenance, 'states': states}

        if json_filename:
            json_dict['states'] = list(json_dict['states'])
            with open(json_filename, 'w') as f:
                json.dump(json_dict, f, indent=2, sort_keys=True)

        return json_dict
Beispiel #7
0
    def test_tagged_smiles(self):
        """Test index-tagges smiles"""
        from openeye import oechem
        inf = get_fn('ethylmethylidyneamonium.mol2')
        ifs = oechem.oemolistream(inf)
        inp_mol = oechem.OEMol()
        oechem.OEReadMolecule(ifs, inp_mol)

        tagged_smiles = mol_to_smiles(inp_mol,
                                      isomeric=True,
                                      mapped=True,
                                      explicit_hydrogen=True)

        # Tags should always be the same as mol2 molecule ordering
        self.assertEqual(
            tagged_smiles,
            '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]')
Beispiel #8
0
def frag_to_smiles(frags, mol):
    """
    Convert fragments (AtomBondSet) to canonical isomeric SMILES string
    Parameters
    ----------
    frags: list
    mol: OEMol
    OESMILESFlag: str
        Either 'ISOMERIC' or 'DEFAULT'. This flag determines which OE function to use to generate SMILES string

    Returns
    -------
    smiles: dict of smiles to frag

    """

    smiles = {}
    for frag in frags:
        fragatompred = oechem.OEIsAtomMember(frag.GetAtoms())
        fragbondpred = oechem.OEIsBondMember(frag.GetBonds())

        #fragment = oechem.OEGraphMol()
        fragment = oechem.OEMol()
        adjustHCount = True
        oechem.OESubsetMol(fragment, mol, fragatompred, fragbondpred, adjustHCount)

        oechem.OEPerceiveChiral(fragment)
        # sanity check that all atoms are bonded
        for atom in fragment.GetAtoms():
            if not list(atom.GetBonds()):
                raise Warning("Yikes!!! An atom that is not bonded to any other atom in the fragment. "
                              "You probably ran into a bug. Please report the input molecule to the issue tracker")
        #s = oechem.OEMolToSmiles(fragment)
        #s2 = fragmenter.utils.create_mapped_smiles(fragment, tagged=False, explicit_hydrogen=False)
        s = mol_to_smiles(fragment, mapped=False, explicit_hydrogen=True, isomeric=True)

        if s not in smiles:
            smiles[s] = []
        smiles[s].append(frag)

    return smiles
Beispiel #9
0
    def test_expand_enantiomers(self):
        smiles = 'CN(C)C/C=C/C(=O)NC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl)O[C@H]4CCOC4'
        molecule = chemi.smiles_to_oemol(smiles)
        stereoisomers = fragmenter.fragment._expand_states(
            molecule, enumerate='stereoisomers')

        stereoisomers_1 = {
            'CN(C)C/C=C/C(=O)Nc1cc2c(cc1O[C@@H]3CCOC3)ncnc2Nc4ccc(c(c4)Cl)F',
            'CN(C)C/C=C/C(=O)Nc1cc2c(cc1O[C@H]3CCOC3)ncnc2Nc4ccc(c(c4)Cl)F',
            'CN(C)C/C=C\\C(=O)Nc1cc2c(cc1O[C@@H]3CCOC3)ncnc2Nc4ccc(c(c4)Cl)F',
            'CN(C)C/C=C\\C(=O)Nc1cc2c(cc1O[C@H]3CCOC3)ncnc2Nc4ccc(c(c4)Cl)F'
        }

        stereoisomers_2 = set()
        for mol in stereoisomers:
            stereoisomers_2.add(
                mol_to_smiles(mol,
                              mapped=False,
                              explicit_hydrogen=False,
                              isomeric=True))
        intersection = stereoisomers_1.intersection(stereoisomers_2)
        self.assertEqual(len(intersection), len(stereoisomers_1))
        self.assertEqual(len(intersection), len(stereoisomers_2))
        self.assertEqual(len(stereoisomers_1), len(stereoisomers_2))
Beispiel #10
0
def find_torsions(molecule, restricted=True, terminal=True):
    #ToDo: Get rid of equivalent torsions. Ex H-C-C-C and C-C-C-H.
    """
    This function takes an OEMol (atoms must be tagged with index map) and finds the map indices for torsion that need
    to be driven.

    Parameters
    ----------
    molecule : OEMol
        The atoms in the molecule need to be tagged with map indices
    restricted: bool, optional, default True
        If True, will find restricted torsions such as torsions in rings and double bonds.
    terminal: bool, optional, default True
        If True, will find terminal torsions

    Returns
    -------
    needed_torsion_scans: dict
        a dictionary that maps internal, terminal and restricted torsions to map indices of torsion atoms

    """
    # Check if molecule has map
    from openeye import oechem
    is_mapped = has_atom_map(molecule)
    if not is_mapped:
        utils.logger().warning('Molecule does not have atom map. A new map will be generated. You might need a new tagged SMARTS if the ordering was changed')
        tagged_smiles = mol_to_smiles(molecule, isomeric=True, mapped=True, explicit_hydrogen=True)
        # Generate new molecule with tags
        molecule = chemi.smiles_to_oemol(tagged_smiles)
        utils.logger().warning('If you already have a tagged SMARTS, compare it with the new one to ensure the ordering did not change')
        utils.logger().warning('The new tagged SMARTS is: {}'.format(tagged_smiles))
        # ToDo: save the new tagged SMILES somewhere. Maybe return it?

    needed_torsion_scans = {'internal': {}, 'terminal': {}, 'restricted': {}}
    mol = oechem.OEMol(molecule)
    if restricted:
        smarts = '[*]~[C,c]=,@[C,c]~[*]' # This should capture double bonds (not capturing rings because OpenEye does not
                                       # generate skewed conformations. ToDo: use scan in geometric or something else to get this done.
        restricted_tors = _find_torsions_from_smarts(molecule=mol, smarts=smarts)
        if len(restricted_tors) > 0:
            restricted_tors_min = one_torsion_per_rotatable_bond(restricted_tors)
            for i, tor in enumerate(restricted_tors_min):
                tor_name = ((tor[0].GetMapIdx() - 1), (tor[1].GetMapIdx() - 1), (tor[2].GetMapIdx() - 1), (tor[3].GetMapIdx() - 1))
                needed_torsion_scans['restricted']['torsion_{}'.format(str(i))] = tor_name

    if terminal:
        smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]' # This smarts should match terminal torsions such as -CH3, -NH2, -NH3+, -OH, and -SH
        h_tors = _find_torsions_from_smarts(molecule=mol, smarts=smarts)
        if len(h_tors) > 0:
            h_tors_min = one_torsion_per_rotatable_bond(h_tors)
            for i, tor in enumerate(h_tors_min):
                tor_name = ((tor[0].GetMapIdx() -1 ), (tor[1].GetMapIdx() - 1), (tor[2].GetMapIdx() - 1), (tor[3].GetMapIdx() - 1))
                needed_torsion_scans['terminal']['torsion_{}'.format(str(i))] = tor_name

    mid_tors = [[tor.a, tor.b, tor.c, tor.d ] for tor in oechem.OEGetTorsions(mol)]
    if mid_tors:
        mid_tors_min = one_torsion_per_rotatable_bond(mid_tors)
        for i, tor in enumerate(mid_tors_min):
            tor_name = ((tor[0].GetMapIdx() - 1), (tor[1].GetMapIdx() - 1), (tor[2].GetMapIdx() - 1), (tor[3].GetMapIdx() - 1))
            needed_torsion_scans['internal']['torsion_{}'.format(str(i))] = tor_name

    # Check that there are no duplicate torsions in mid and h_torsions
    list_tor = list(needed_torsion_scans['internal'].values()) + list(needed_torsion_scans['terminal'].values())
    set_tor = set(list_tor)

    if not len(set_tor) == len(list_tor):
        raise Warning("There is a torsion defined in both mid and terminal torsions. This should not happen. Check "
                      "your molecule and the atom mapping")
    return needed_torsion_scans
Beispiel #11
0
def generate_fragments(molecule, generate_visualization=False, strict_stereo=False, combinatorial=True, MAX_ROTORS=2,
                       remove_map=True, json_filename=None):
    """
    This function generates fragments from molecules. The output is a dictionary that maps SMILES of molecules to SMILES
     for fragments. The default SMILES are generated with openeye.oechem.OEMolToSmiles. These SMILES strings are canonical
     isomeric SMILES.
     The dictionary also includes a provenance field which defines how the fragments were generated.

    Parameters
    ----------
    molecule: OEMol to fragment
    generate_visualization: bool
        If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory
        where this function is run from.
    combinatorial: bool
        If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS
    MAX_ROTORS: int
        rotor threshold for combinatorial
    strict_stereo: bool
        Note: This applies to the molecule being fragmented. Not the fragments.
        If True, omega will generate conformation with stereochemistry defined in the SMILES string for charging.
    remove_map: bool
        If True, the index tags will be removed. This will remove duplicate fragments. Defualt True
    json_filename: str
        filenmae for JSON. If provided, will save the returned dictionary to a JSON file. Default is None

    Returns
    -------
    fragments: dict
        mapping of SMILES from the parent molecule to the SMILES of the fragments
    """
    fragments = dict()

    try:
        molecules = list(molecule)
    except TypeError:
        molecules = [molecule]
    for molecule in molecules:
        # normalize molecule
        molecule = normalize_molecule(molecule, molecule.GetTitle())
        if remove_map:
            # Remove tags from smiles. This is done to make it easier to find duplicate fragments
            for a in molecule.GetAtoms():
                a.SetMapIdx(0)
        frags = _generate_fragments(molecule, strict_stereo=strict_stereo)
        if not frags:
            logger().warning('Skipping {}, SMILES: {}'.format(molecule.GetTitle(), oechem.OECreateSmiString(molecule)))
            continue
        charged = frags[0]
        frags = frags[-1]
        frag_list = list(frags.values())
        if combinatorial:
            smiles = smiles_with_combined(frag_list, charged, MAX_ROTORS)
        else:
            smiles = frag_to_smiles(frag_list, charged)

        parent_smiles = mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=False, mapped=False)
        if smiles:
            fragments[parent_smiles] = list(smiles.keys())
        else:
            # Add molecule where no fragments were found for terminal torsions and / or rings and non rotatable bonds
            fragments[parent_smiles] = [mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=True, mapped=False)]

        if generate_visualization:
            IUPAC = oeiupac.OECreateIUPACName(molecule)
            name = molecule.GetTitle()
            if IUPAC == name:
                name = make_python_identifier(oechem.OEMolToSmiles(molecule))[0]
            oname = '{}.pdf'.format(name)
            ToPdf(charged, oname, frags)
        del charged, frags
    if json_filename:
        f = open(json_filename, 'w')
        j = json.dump(fragments, f, indent=2, sort_keys=True)
        f.close()

    return fragments
Beispiel #12
0
def expand_states(molecule, protonation=True, tautomers=False, stereoisomers=True, max_states=200, level=0, reasonable=True,
                  carbon_hybridization=True, suppress_hydrogen=True, verbose=True, filename=None,
                  return_smiles_list=False, return_molecules=False):
    """
    Expand molecule states (choice of protonation, tautomers and/or stereoisomers).
    Protonation states expands molecules to protonation of protonation sites (Some states might only be reasonable in
    very high or low pH. ToDo: Only keep reasonable protonation states)
    Tatutomers: Should expand to tautomer states but most of hte results are some resonance structures. Defualt if False
    for this reason
    Stereoisomers expands enantiomers and geometric isomers (cis/trans).
    Returns set of SMILES

    Parameters
    ----------
    molecule: OEMol
        Molecule to expand
    protonation: Bool, optional, default=True
        If True will enumerate protonation states.
    tautomers: Bool, optional, default=False
        If True, will enumerate tautomers.  (Note: Default is False because results usually give resonance structures
        which ins't needed for torsion scans
    stereoisomers: Bool, optional, default=True
        If True will enumerate stereoisomers (cis/trans and R/S).
    max_states: int, optional, default=True
        maximum states enumeration should find
    level: int, optional, Defualt=0
        The level for enumerating tautomers. It can go up until 7. The higher the level, the more tautomers will be
        generated but they will also be less reasonable.
    reasonable: bool, optional, default=True
        Will rank tautomers enumerated energetically (https://docs.eyesopen.com/toolkits/python/quacpactk/tautomerstheory.html#reasonable-ranking)
    carbon_hybridization: bool, optional, default=True
        If True will allow carbons to change hybridization
    suppress_hydrogen: bool, optional, default=True
        If true, will suppress explicit hydrogen. It's considered best practice to set this to True when enumerating tautomers.
    verbose: Bool, optional, default=True
    filename: str, optional, default=None
        Filename to save SMILES to. If None, SMILES will not be saved to file.
    return_smiles_list: bool, optional, default=False
        If True, will return a list of SMILES with numbered name of molecule. Use this if you want ot write out an
        smi file of all molecules processed with a unique numbered name for each state.
    return_molecules: bool, optional, default=False
        If true, will return list of OEMolecules instead of SMILES

    Returns
    -------
    states: set of SMILES for enumerated states

    """
    title = molecule.GetTitle()
    states = set()
    molecules = [molecule]
    if verbose:
        logger().info("Enumerating states for {}".format(title))
    if protonation:
        logger().info("Enumerating protonation states for {}".format(title))
        molecules.extend(_expand_states(molecules, enumerate='protonation', max_states=max_states, verbose=verbose,
                                        level=level, suppress_hydrogen=suppress_hydrogen))
    if tautomers:
        logger().info("Enumerating tautomers for {}".format(title))
        molecules.extend(_expand_states(molecules, enumerate='tautomers', max_states=max_states, reasonable=reasonable,
                                        carbon_hybridization=carbon_hybridization, verbose=verbose, level=level,
                                        suppress_hydrogen=suppress_hydrogen))
    if stereoisomers:
        logger().info("Enumerating stereoisomers for {}".format(title))
        molecules.extend(_expand_states(molecules, enumerate='stereoisomers', max_states=max_states, verbose=verbose))

    for molecule in molecules:
        #states.add(fragmenter.utils.create_mapped_smiles(molecule, tagged=False, explicit_hydrogen=False))
        # Not using create mapped SMILES because OEMol is needed but state is OEMolBase.
        #states.add(oechem.OEMolToSmiles(molecule))
        try:
         states.add(mol_to_smiles(molecule, isomeric=True, mapped=False, explicit_hydrogen=False))
        except ValueError:
            logger().warn("Tautomer or protonation state has a chiral center. Expanding stereoisomers")
            stereo_states = _expand_states(molecule, enumerate='steroisomers')
            for state in stereo_states:
                states.add(mol_to_smiles(molecule, isomeric=True, mapped=False, explicit_hydrogen=False))


    logger().info("{} states were generated for {}".format(len(states), oechem.OEMolToSmiles(molecule)))

    if filename:
        count = 0
        smiles_list = []
        for molecule in states:
            molecule = molecule + ' ' + title + '_' + str(count)
            count += 1
            smiles_list.append(molecule)
        to_smi(smiles_list, filename)

    if return_smiles_list:
        return smiles_list

    if return_molecules:
        return molecules

    return states
Beispiel #13
0
def enumerate_states(molecule,
                     tautomers=True,
                     stereoisomers=True,
                     verbose=False,
                     return_mols=False,
                     explicit_h=True,
                     return_names=False,
                     max_stereo_returns=1,
                     filter_nitro=True,
                     **kwargs):
    """
    Expand tautomeric state and stereoisomers for molecule.

    Parameters
    ----------
    molecule : OEMol
        Molecule to enumerate states
    tautomers : bool, optional, default True
        If False, will not generate tautomers
    stereoisomers : bool, optional, default True
        If False, will not generate all stereoisomers.
    verbose : bool, optional, default False
        If True, output will be verbose
    return_mols : bool, optional, default False
        If True, will return oemols instead of SMILES. Some molecules might be duplicate states
    explicit_h : bool, optional, default True
        If True, SMILES of states will have explicit hydrogen
    return_names : bool, optional, default True
        If True, will return names of molecules with SMILES
    max_stereo_returns : int, optional, default 1
        If stereoisomers is set to False, and the incoming molecule is missing stereo information, OEFlipper will
        generate stereoisomers for missing stereo center. max_stereo_returns controls how many of those will be returned
    ** max_states: int, optional, default 200
        This gets passed to `_enumerate_tautomers` and `_enumerate_stereoisomers`
        max number of states `_enumerate_tautomers` and `_enumerate_stereoisomers` generate
    ** pka_norm: bool, optional, default True
        This gets passed to `_enumerate_tautomers`. If True, ionization state of each tautomer will be assigned to a predominate
        state at pH ~7.4
    ** warts: bool, optional, default True
        This gets passed to `_enumerate_tautomers` and _enumerate_stereoisomers`
        If True, adds a wart to each new state. A 'wart' is a systematic
    ** force_flip: bool, optional, default True
        This gets passed to `_enumerate_stereoisomers`
        Force flipping all stereocenters. If False, will only generate stereoisomers for stereocenters that are undefined
    ** enum_nitorgen: bool, optional, default True
        This gets passed to `_enumerate_stereoisomers`
        If true, invert non-planer nitrogens

    Returns
    -------
    states: list
        list of oemols or SMILES of states generated for molecule

    """
    from openeye import oechem

    # If incoming molecule has nitro in form ([NX3](=O)=O), do not filter out later
    if _check_nitro(molecule):
        filter_nitro = False
    title = molecule.GetTitle()
    states = []
    if return_names:
        names = []

    if verbose:
        logger().info("Enumerating states for {}".format(title))

    if stereoisomers:
        if verbose:
            logger().info("Enumerating stereoisomers for {}".format(title))
        stereo_mols = (_enumerate_stereoisomers(molecule, **kwargs))
        if verbose:
            logger().info('Enumerated {} stereoisomers'.format(
                len(stereo_mols)))

    if tautomers:
        if not stereoisomers:
            stereo_mols = [molecule]
        tau_mols = []
        if verbose:
            logger().info("Enumerating tautomers states for {}".format(title))
        for mol in stereo_mols:
            tau_mols.extend(_enumerate_tautomers(mol, **kwargs))
        if verbose:
            logger().info('Enumerated {} tautomers'.format(len(tau_mols)))

        # check for nitro in ([NX3](=O)=O) form
        if filter_nitro:
            tau_mols[:] = [mol for mol in tau_mols if not _check_nitro(mol)]

    if stereoisomers and tautomers:
        all_mols = stereo_mols + tau_mols
    elif stereoisomers and not tautomers:
        all_mols = stereo_mols
    elif not stereoisomers and tautomers:
        all_mols = tau_mols
        all_mols.append(molecule)
    else:
        all_mols = [molecule]

    if return_mols:
        return all_mols

    for mol in all_mols:
        try:
            smiles = mol_to_smiles(mol,
                                   isomeric=True,
                                   mapped=False,
                                   explicit_hydrogen=explicit_h)
            if smiles not in states:
                states.append(smiles)
                if return_names:
                    names.append(mol.GetTitle())

        except ValueError:
            # Stereo is not fully defined. Use flipper with force_flip set to False
            stereo_states = _enumerate_stereoisomers(mol,
                                                     force_flip=False,
                                                     enum_nitrogen=True,
                                                     warts=True)
            if len(stereo_states) > max_stereo_returns:
                stereo_states = stereo_states[:max_stereo_returns]

            for state in stereo_states:
                try:
                    smiles = mol_to_smiles(state,
                                           isomeric=True,
                                           mapped=False,
                                           explicit_hydrogen=explicit_h)
                except ValueError:
                    stereo_states_forced = _enumerate_stereoisomers(
                        mol, force_flip=True, enum_nitrogen=True, warts=True)
                    if len(stereo_states_forced) > max_stereo_returns:
                        stereo_states_forced = stereo_states_forced[:
                                                                    max_stereo_returns]
                    for state_forced in stereo_states_forced:
                        smiles = mol_to_smiles(state_forced,
                                               isomeric=True,
                                               mapped=False,
                                               explicit_hydrogen=explicit_h)
                        if smiles not in states:
                            states.append(smiles)
                            if return_names:
                                names.append(state.GetTitle())
                if smiles not in states:
                    states.append(smiles)
                    if return_names:
                        names.append(state.GetTitle())

    if verbose:
        logger().info("{} states were generated for {}".format(
            len(states), oechem.OEMolToSmiles(molecule)))

    if return_names:
        return states, names

    return states
Beispiel #14
0
    def enumerate_fragments(self,
                            molecule,
                            title='',
                            mol_provenance=None,
                            json_filename=None,
                            generate_vis=False):
        """
        Fragment molecule

        Parameters
        ----------
        molecule: Input molecule. Very permissive. Can be anything that OpenEye can parse
            SMILES string of molecule to fragment
        workflow_id: str
            Which workflow to use for options.
        options: dictionary, optional, default None
            Dictionary of keyword options. If None, will use optiond defined in workflows
        title: str, optional. Default empty str
            The title or name of the molecule. If empty stirng will use the IUPAC name for molecule title.
        mol_provenance: dict, optional. Default is None
            provenance for molecule. If the molecule is a state from enumerate_states, the provenance from enumerate_states
            should be used
        json_filename: str, optional. Default None
            If a filename is provided, will write output to json file.
        generate_vis: bool, optional, default False
            If True, will generate visualization of fragments from parent molecule

        Returns
        -------
        json_dict: dict
            dictionary containing provenance and fragments.

        """
        routine = 'enumerate_fragments'
        provenance = _get_provenance(workflow_id=self.workflow_id,
                                     routine=routine)
        options = self.off_workflow.get_options(
            'enumerate_fragments')['options']

        parent_molecule = chemi.standardize_molecule(molecule, title)
        parent_molecule_smiles = mol_to_smiles(parent_molecule,
                                               isomeric=True,
                                               explicit_hydrogen=False,
                                               mapped=False)
        provenance['routine']['enumerate_fragments'][
            'parent_molecule_name'] = parent_molecule.GetTitle()
        provenance['routine']['enumerate_fragments'][
            'parent_molecule'] = parent_molecule_smiles

        fragments = fragment.generate_fragments(parent_molecule, generate_vis,
                                                **options)

        if self.states:
            # Check if current state exists
            if parent_molecule_smiles in self.states['states']:
                provenance['routine']['enumerate_states'] = self.states[
                    'provenance']['routine']['enumerate_states']
            elif mol_provenance:
                provenance['routine']['enumerate_states'] = mol_provenance[
                    'routine']['enumerate_states']

        # Generate identifiers for fragments
        fragments_json_dict = {}
        for fragm in fragments:
            for i, frag in enumerate(fragments[fragm]):
                identifiers = to_molecule_id(frag, canonicalization='openeye')
                frag = identifiers['canonical_isomeric_smiles']
                fragments_json_dict[frag] = {'identifiers': identifiers}
                fragments_json_dict[frag]['provenance'] = provenance
                fragments_json_dict[frag]['provenance'][
                    'canonicalization'] = identifiers.pop('provenance')

        if json_filename:
            with open(json_filename, 'w') as f:
                json.dump(fragments_json_dict, f, indent=2, sort_keys=True)

        return fragments_json_dict