Ejemplo n.º 1
0
def run_oemol_test_suite(iupac='ethane'):
   """
   Runs all of the oemol related tests for perses.utils.openeye

   Parameters
   ---------
   iupac : str, default 'ethane'

   """
   from openmoltools.openeye import iupac_to_oemol
   import copy
   import numpy as np
   import simtk.unit as unit
   from openeye import oechem

   oemol = iupac_to_oemol(iupac)
   positions = test_extractPositionsFromOEMol(oemol)

   # shifting all of the positions by 1. A
   new_positions = np.zeros(np.shape(positions))
   for atom in range(oemol.NumAtoms()):
       new_positions[atom] = copy.deepcopy(positions[atom]) + [1., 1., 1.]*unit.angstrom
   new_positions *= unit.angstrom

   molecule = test_giveOpenmmPositionsToOEMol(new_positions,oemol)

   smiles = oechem.OECreateSmiString(molecule,oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)

   smiles_oemol = smiles_to_oemol(smiles)

   # check that the two systems have the same numbers of atoms
   assert (oemol.NumAtoms() == smiles_oemol.NumAtoms()), "Discrepancy between molecule generated from IUPAC and SMILES"
Ejemplo n.º 2
0
def LibGen(libgen, ofs, unique, isomeric):
    smiflag = oechem.OESMILESFlag_DEFAULT  # Canonical|AtomMaps|Rgroup
    if isomeric:
        smiflag |= oechem.OESMILESFlag_ISOMERIC
    # access products
    uniqproducts = []
    for mol in libgen.GetProducts():
        smiles = oechem.OECreateSmiString(mol, smiflag)
        if not unique or smiles not in uniqproducts:
            uniqproducts.append(smiles)
            oechem.OEWriteMolecule(ofs, mol)
Ejemplo n.º 3
0
def get_smarts(prefix, atom_idxs):
    """Get the SMARTS corresponding to a list of atom indices"""

    offmol = Molecule.from_file(prefix + '.mol2')
    fix_carboxylate_bond_orders(offmol)
    if prefix in prefix2pmd_struct:
        pmd_struct = prefix2pmd_struct[prefix]
    else:
        pmd_struct = ParmEd.load_file(prefix + '.prmtop')
        prefix2pmd_struct[prefix] = pmd_struct
    oemol = offmol.to_openeye()
    residues_of_interest = set()
    atom_indices_of_interest = set()
    #for atom_idx in atom_idxs:
    #residues_of_interest.add(pmd_struct.atoms[atom_idx].residue.idx)
    #atom_indices_of_interest.add(atom_idx)
    #for neighbor in oemol.GetAtom(atom_idx).GetAtoms():
    #  atom_indices_of_interest.add(neighbor.GetIdx())

    for oeatom, pmd_atom in zip(oemol.GetAtoms(), pmd_struct.atoms):

        # Delete all non-residue-of-interest atoms
        #if (pmd_atom.residue.idx in residues_of_interest):
        #  atom_indices_of_interest.add(pmd_atom.idx)

        # Assign tags to atoms of interest
        if (oeatom.GetIdx() in atom_idxs):
            atom_idx = oeatom.GetIdx()
            map_index = atom_idxs.index(atom_idx) + 1
            oeatom.SetMapIdx(map_index)

            atom_indices_of_interest.add(atom_idx)
            for neighbor in oeatom.GetAtoms():
                atom_indices_of_interest.add(neighbor.GetIdx())
    # Make a "Subset" molecule, so that we don't get weird charges
    # around where we cleave the residues
    subsetmol = OEChem.OEGraphMol()
    oepred = OEChem.PyAtomPredicate(
        lambda x: x.GetIdx() in atom_indices_of_interest)
    OEChem.OESubsetMol(subsetmol, oemol, oepred)
    smiles_options = (OEChem.OESMILESFlag_Canonical
                      | OEChem.OESMILESFlag_Isotopes
                      | OEChem.OESMILESFlag_RGroups)

    # Add the atom and bond stereo flags
    smiles_options |= OEChem.OESMILESFlag_AtomStereo | OEChem.OESMILESFlag_BondStereo

    # Add the hydrogen flag
    smiles_options |= OEChem.OESMILESFlag_Hydrogens
    smiles_options |= OEChem.OESMILESFlag_AtomMaps
    smiles = OEChem.OECreateSmiString(subsetmol, smiles_options)

    return smiles
Ejemplo n.º 4
0
def CanSmi(mol, isomeric, kekule):
    oechem.OEFindRingAtomsAndBonds(mol)
    oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_OpenEye)
    smiflag = oechem.OESMILESFlag_Canonical
    if isomeric:
        smiflag |= oechem.OESMILESFlag_ISOMERIC

    if kekule:
        for bond in mol.GetBonds(oechem.OEIsAromaticBond()):
            bond.SetIntType(5)
        oechem.OECanonicalOrderAtoms(mol)
        oechem.OECanonicalOrderBonds(mol)
        oechem.OEClearAromaticFlags(mol)
        oechem.OEKekulize(mol)

    smi = oechem.OECreateSmiString(mol, smiflag)
    return smi
Ejemplo n.º 5
0
def to_oechem_can(smi):  #,hack_uff=F):
    """ Note that some SMILES such as C=N=O cannot be recognized
    correctly by openbabel. With OEChem, it is interpretted as
    C=[NH]=O, as was desired """

    from openeye import oechem

    if smi in element_cans:
        return '[' + smi + 'H%d]' % (nves[smi])
    m = oechem.OEGraphMol()
    assert oechem.OESmilesToMol(m, smi)
    assert oechem.OEAddExplicitHydrogens(m)
    #atyps = []
    #for ai in m.GetAtoms():
    #    zi = ai.GetAtomicNum()
    #    vi = ai.GetValence()
    #    nhi = ai.GetTotalHCount()
    #    if zi in [7,15] and vi==5 and nhi>0:
    #        for aj in ai.GetAtomIter():
    #            zj = aj.GetAtomicNum()
    #            if zj==1: aj.SetIsotope(T)
    #    elif zi in [5] and vi==3 and nhi>0:
    #        for aj in ai.GetAtomIter():
    #            zj = aj.GetAtomicNum()
    #            if zj==1: aj.SetIsotope(T)
    #
    # "OESMILESFlag_ImpHCount" is indispensible!!
    # Otherwise, B=C won't be processed correctly by openbabel, i.e., somehow obabel
    # tries to add two H's (instead of one) to B. While things are ok with [BH]=C
    flavor = oechem.OESMILESFlag_Isotopes | oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_ImpHCount
    smi = oechem.OECreateSmiString(m, flavor)
    # OECreateIsoSmiString() # ISOMERIC .eq. Isotopes | AtomStereo | BondStereo | Canonical | AtomMaps | RGroups
    #fout = tpf.NamedTemporaryFile(dir='/tmp/').name + '.sdf'
    #ofs = oemolostream(fout)
    #ofs.SetFormat( OEFormat_SDF )
    #OEWriteMolecule(ofs, m)
    #_m = pb.readstring( 'sdf', open(fout,'r').read() )
    #m = _m.OBMol
    #os.system('rm %s'%fout)
    return smi  # m
def CanSmi(mol, isomeric, kekule):
    """
    Returns the cannonical smile from the OEMol provided
    :param mol: OEMolBase object
    :param isomeric: force isometric
    :param kekule: use kekule cleaning
    :return: string of OESmiles
    """
    oechem.OEFindRingAtomsAndBonds(mol)
    oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_OpenEye)
    smiflag = oechem.OESMILESFlag_Canonical
    if isomeric:
        smiflag |= oechem.OESMILESFlag_ISOMERIC

    if kekule:
        for bond in mol.GetBonds(oechem.OEIsAromaticBond()):
            bond.SetIntType(5)
        oechem.OECanonicalOrderAtoms(mol)
        oechem.OECanonicalOrderBonds(mol)
        oechem.OEClearAromaticFlags(mol)
        oechem.OEKekulize(mol)

    smi = oechem.OECreateSmiString(mol, smiflag)
    return smi
Ejemplo n.º 7
0
def sanitizeSMILES(smiles_list, mode='drop', verbose=False):
    """
    Sanitize set of SMILES strings by ensuring all are canonical isomeric SMILES.
    Duplicates are also removed.

    Parameters
    ----------
    smiles_list : iterable of str
        The set of SMILES strings to sanitize.
    mode : str, optional, default='drop'
        When a SMILES string that does not correspond to canonical isomeric SMILES is found, select the action to be performed.
        'exception' : raise an `Exception`
        'drop' : drop the SMILES string
        'expand' : expand all stereocenters into multiple molecules
    verbose : bool, optional, default=False
        If True, print verbose output.

    Returns
    -------
    sanitized_smiles_list : list of str
         Sanitized list of canonical isomeric SMILES strings.

    Examples
    --------
    Sanitize a simple list.
    >>> smiles_list = ['CC', 'CCC', '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]']
    Throw an exception if undefined stereochemistry is present.
    >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='exception')
    Traceback (most recent call last):
      ...
    Exception: Molecule '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]' has undefined stereocenters
    Drop molecules iwth undefined stereochemistry.
    >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='drop')
    >>> len(sanitized_smiles_list)
    2
    Expand molecules iwth undefined stereochemistry.
    >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='expand')
    >>> len(sanitized_smiles_list)
    4
    """
    from openeye import oechem
    from openeye.oechem import OEGraphMol, OESmilesToMol, OECreateIsoSmiString
    from perses.tests.utils import has_undefined_stereocenters, enumerate_undefined_stereocenters
    sanitized_smiles_set = set()
    OESMILES_OPTIONS = oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_ISOMERIC | oechem.OESMILESFlag_Hydrogens  ## IVY
    for smiles in smiles_list:
        molecule = OEGraphMol()
        OESmilesToMol(molecule, smiles)

        oechem.OEAddExplicitHydrogens(molecule)

        if verbose:
            molecule.SetTitle(smiles)
            oechem.OETriposAtomNames(molecule)

        if has_undefined_stereocenters(molecule, verbose=verbose):
            if mode == 'drop':
                if verbose:
                    print("Dropping '%s' due to undefined stereocenters." % smiles)
                continue
            elif mode == 'exception':
                raise Exception("Molecule '%s' has undefined stereocenters" % smiles)
            elif mode == 'expand':
                if verbose:
                    print('Expanding stereochemistry:')
                    print('original: %s', smiles)
                molecules = enumerate_undefined_stereocenters(molecule, verbose=verbose)
                for molecule in molecules:
                    smiles_string = oechem.OECreateSmiString(molecule, OESMILES_OPTIONS)  ## IVY
                    sanitized_smiles_set.add(smiles_string)  ## IVY
                    if verbose: print('expanded: %s', smiles_string)
        else:
            # Convert to OpenEye's canonical isomeric SMILES.
            smiles_string = oechem.OECreateSmiString(molecule, OESMILES_OPTIONS) ## IVY
            sanitized_smiles_set.add(smiles_string) ## IVY

    sanitized_smiles_list = list(sanitized_smiles_set)

    return sanitized_smiles_list
Ejemplo n.º 8
0
    create_systems(topologies,
                   positions,
                   output_directory,
                   project_prefix,
                   solvate=solvate)

    #generate atom maps for all pairs:
    ifs = oechem.oemolistream()
    ifs.open(ligand_filename)

    # get the list of molecules
    mol_list = [oechem.OEMol(mol) for mol in ifs.GetOEMols()]

    smiles_list = []
    for idx, mol in enumerate(mol_list):
        mol.SetTitle("MOL{}".format(idx))
        oechem.OETriposAtomNames(mol)
        smiles_list.append(oechem.OECreateSmiString(mol, OESMILES_OPTIONS))

    #smiles_list = [oechem.OECreateSmiString(mol, OESMILES_OPTIONS)]

    atom_mapper = AtomMapper(mol_list)
    atom_mapper.map_all_molecules()
    atom_mapper.generate_and_check_proposal_matrix()

    atom_mapper_filename = os.path.join(
        output_directory, "{}_atom_mapper.json".format(project_prefix))
    with open(atom_mapper_filename, 'w') as map_outfile:
        map_outfile.write(atom_mapper.to_json())
Ejemplo n.º 9
0
def createSMILESfromOEMol(molecule):
    smiles = oechem.OECreateSmiString(molecule,
                                      oechem.OESMILESFlag_DEFAULT |
                                      oechem.OESMILESFlag_Hydrogens)
    return smiles
Ejemplo n.º 10
0
    def __init__(self,
                 protein_pdb_filename,
                 ligand_file,
                 old_ligand_index,
                 new_ligand_index,
                 forcefield_files,
                 pressure=1.0 * unit.atmosphere,
                 temperature=300.0 * unit.kelvin,
                 solvent_padding=9.0 * unit.angstroms):
        """
        Initialize a NonequilibriumFEPSetup object

        Parameters
        ----------
        protein_pdb_filename : str
            The name of the protein pdb file
        ligand_file : str
            the name of the ligand file (any openeye supported format)
        ligand_smiles : list of two str
            The SMILES strings representing the two ligands
        forcefield_files : list of str
            The list of ffxml files that contain the forcefields that will be used
        pressure : Quantity, units of pressure
            Pressure to use in the barostat
        temperature : Quantity, units of temperature
            Temperature to use for the Langevin integrator
        solvent_padding : Quantity, units of length
            The amount of padding to use when adding solvent
        """
        self._protein_pdb_filename = protein_pdb_filename
        self._pressure = pressure
        self._temperature = temperature
        self._barostat_period = 50
        self._padding = solvent_padding

        self._ligand_file = ligand_file
        self._old_ligand_index = old_ligand_index
        self._new_ligand_index = new_ligand_index

        self._old_ligand_oemol = self.load_sdf(self._ligand_file,
                                               index=self._old_ligand_index)
        self._new_ligand_oemol = self.load_sdf(self._ligand_file,
                                               index=self._new_ligand_index)

        self._old_ligand_positions = extractPositionsFromOEMOL(
            self._old_ligand_oemol)

        ffxml = forcefield_generators.generateForceFieldFromMolecules(
            [self._old_ligand_oemol, self._new_ligand_oemol])

        self._old_ligand_oemol.SetTitle("MOL")
        self._new_ligand_oemol.SetTitle("MOL")

        self._new_ligand_smiles = oechem.OECreateSmiString(
            self._new_ligand_oemol,
            oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        #self._old_ligand_smiles = '[H]c1c(c(c(c(c1N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])nc(n3[H])[H])[H])[H])S(=O)(=O)C([H])([H])[H])[H]'
        self._old_ligand_smiles = oechem.OECreateSmiString(
            self._old_ligand_oemol,
            oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)

        print(self._new_ligand_smiles)
        print(self._old_ligand_smiles)

        self._old_ligand_topology = forcefield_generators.generateTopologyFromOEMol(
            self._old_ligand_oemol)
        self._old_ligand_md_topology = md.Topology.from_openmm(
            self._old_ligand_topology)
        self._new_ligand_topology = forcefield_generators.generateTopologyFromOEMol(
            self._new_ligand_oemol)
        self._new_liands_md_topology = md.Topology.from_openmm(
            self._new_ligand_topology)

        protein_pdbfile = open(self._protein_pdb_filename, 'r')
        pdb_file = app.PDBFile(protein_pdbfile)
        protein_pdbfile.close()

        self._protein_topology_old = pdb_file.topology
        self._protein_md_topology_old = md.Topology.from_openmm(
            self._protein_topology_old)
        self._protein_positions_old = pdb_file.positions
        self._forcefield = app.ForceField(*forcefield_files)
        self._forcefield.loadFile(StringIO(ffxml))

        print("Generated forcefield")

        self._complex_md_topology_old = self._protein_md_topology_old.join(
            self._old_ligand_md_topology)
        self._complex_topology_old = self._complex_md_topology_old.to_openmm()

        n_atoms_complex_old = self._complex_topology_old.getNumAtoms()
        n_atoms_protein_old = self._protein_topology_old.getNumAtoms()

        self._complex_positions_old = unit.Quantity(np.zeros(
            [n_atoms_complex_old, 3]),
                                                    unit=unit.nanometers)
        self._complex_positions_old[:
                                    n_atoms_protein_old, :] = self._protein_positions_old
        self._complex_positions_old[
            n_atoms_protein_old:, :] = self._old_ligand_positions

        if pressure is not None:
            barostat = openmm.MonteCarloBarostat(self._pressure,
                                                 self._temperature,
                                                 self._barostat_period)
            self._system_generator = SystemGenerator(
                forcefield_files,
                barostat=barostat,
                forcefield_kwargs={'nonbondedMethod': app.PME})
        else:
            self._system_generator = SystemGenerator(forcefield_files)

        #self._complex_proposal_engine = TwoMoleculeSetProposalEngine(self._old_ligand_smiles, self._new_ligand_smiles, self._system_generator, residue_name="MOL")
        self._complex_proposal_engine = TwoMoleculeSetProposalEngine(
            self._old_ligand_oemol,
            self._new_ligand_oemol,
            self._system_generator,
            residue_name="MOL")
        self._geometry_engine = FFAllAngleGeometryEngine()

        self._complex_topology_old_solvated, self._complex_positions_old_solvated, self._complex_system_old_solvated = self._solvate_system(
            self._complex_topology_old, self._complex_positions_old)
        self._complex_md_topology_old_solvated = md.Topology.from_openmm(
            self._complex_topology_old_solvated)
        print(self._complex_proposal_engine._smiles_list)

        beta = 1.0 / (kB * temperature)

        self._complex_topology_proposal = self._complex_proposal_engine.propose(
            self._complex_system_old_solvated,
            self._complex_topology_old_solvated)
        self._complex_positions_new_solvated, _ = self._geometry_engine.propose(
            self._complex_topology_proposal,
            self._complex_positions_old_solvated, beta)

        #now generate the equivalent objects for the solvent phase. First, generate the ligand-only topologies and atom map
        self._solvent_topology_proposal, self._old_solvent_positions = self._generate_ligand_only_topologies(
            self._complex_positions_old_solvated,
            self._complex_positions_new_solvated)
        self._new_solvent_positions, _ = self._geometry_engine.propose(
            self._solvent_topology_proposal, self._old_solvent_positions, beta)
Ejemplo n.º 11
0
    def process(self, record, port):
        try:
            if port == 'intake':

                if not record.has_value(Fields.primary_molecule):
                    raise ValueError(
                        "Missing the ligand primary molecule field")

                ligand = record.get_value(Fields.primary_molecule)

                if ligand.NumConfs() > 1:
                    raise ValueError(
                        "The ligand {} has multiple conformers: {}".format(
                            ligand.GetTitle(), ligand.GetNumConfs()))

                if not record.has_value(Fields.title):
                    self.log.warn(
                        "Missing title field '{}' field; improvising".format(
                            Fields.title.get_name()))
                    ligand_title = ligand.GetTitle()[0:12]
                else:
                    ligand_title = record.get_value(Fields.title)

                protein = self.md_components.get_protein

                self.md_components.set_ligand(ligand)

                # Check if the ligand is inside the binding site. Cutoff distance 3A
                if not oeommutils.check_shell(ligand, protein, 3):
                    raise ValueError(
                        "The Ligand is probably outside the Protein binding site"
                    )

                # Remove Steric Clashes between the ligand and the other System components
                for comp_name, comp in self.md_components.get_components.items(
                ):

                    # Skip clashes between the ligand itself and the protein
                    if comp_name in ['ligand', 'protein']:
                        continue

                    # Remove Metal clashes if the distance between the metal and the ligand
                    # is less than 1A
                    elif comp_name == 'metals':
                        metal_del = oeommutils.delete_shell(ligand,
                                                            comp,
                                                            1.0,
                                                            in_out='in')

                        if metal_del.NumAtoms() != comp.NumAtoms():
                            self.opt['Logger'].info(
                                "Detected steric-clashes between the ligand {} and metals"
                                .format(ligand_title))

                            self.md_components.set_metals(metal_del)
                            # Remove  clashes if the distance between the selected component and the ligand
                            # is less than 1.5A
                    else:
                        comp_del = oeommutils.delete_shell(ligand,
                                                           comp,
                                                           1.5,
                                                           in_out='in')

                        if comp_del.NumAtoms() != comp.NumAtoms():
                            self.opt['Logger'].info(
                                "Detected steric-clashes between the ligand {} and component {}"
                                .format(ligand_title, comp_name))

                            self.md_components.set_component_by_name(
                                comp_name, comp_del)

                complex_title = 'p' + self.md_components.get_title + '_l' + ligand_title

                mdcomp = self.md_components.copy
                mdcomp.set_title(complex_title)

                # Check Ligand
                lig_check = mdcomp.get_ligand
                smi_lig_check = oechem.OECreateSmiString(lig_check)
                smi_ligand = oechem.OECreateSmiString(ligand)

                if smi_ligand != smi_lig_check:
                    raise ValueError(
                        "Ligand IsoSmiles String check failure: {} vs {}".
                        format(smi_lig_check, smi_ligand))

                # the ligand is the primary molecule
                new_record = OERecord(record)

                new_record.set_value(Fields.title, complex_title)
                new_record.set_value(Fields.ligand, ligand)
                new_record.set_value(Fields.protein, protein)

                # Check Protein Name
                if protein.GetTitle():
                    protein_name = protein.GetTitle()
                else:
                    protein_name = "prot"

                new_record.set_value(Fields.protein_name, protein_name)
                new_record.set_value(Fields.md_components, mdcomp)

                self.success.emit(new_record)

        except Exception as e:
            print("Failed to complete", str(e), flush=True)
            self.opt['Logger'].info('Exception {} {}'.format(
                str(e), self.title))
            self.log.error(traceback.format_exc())
            self.failure.emit(record)

        return
Ejemplo n.º 12
0
    def getscores(self,
                  actions,
                  gsmis,
                  prot,
                  lig,
                  num_returns=10,
                  return_docked_pose=False,
                  refmol=None):
        with self.logger("getscores") as logger:
            if num_returns <= 0:
                num_returns = len(actions) - 1
            logger.log("Action space is ", len(actions))
            idxs = list(
                np.random.choice(len(actions),
                                 min(num_returns,
                                     len(actions) - 1),
                                 replace=False).flatten())
            actions = [actions[idx] for idx in idxs]
            gsmis = [gsmis[idx] for idx in idxs]

            protein = oechem.OEMol(prot)
            receptor = oechem.OEGraphMol()

            if not (self.sort == 'iscores' and self.optimize):
                logger.log(
                    "Creating receptor from recent pdb, this might take awhile"
                )
                oedocking.OEMakeReceptor(receptor, protein, lig)
                dockobj = oedocking.OEDock(self.dockmethod)
                dockobj.Initialize(receptor)
                assert (dockobj.IsInitialized())
                logger.log("done")
            else:
                dockobj = None
                logger.log(
                    "Skipping receptor building as optimize is set and sort method is iscore."
                )

            pscores = []
            dscores = []
            ds_old_scores = []
            ds_start_scores = []

            data = []

            with multiprocessing.Pool() as p:
                imapiter = p.imap(
                    self.env.action.aligner.__class__.call_static,
                    zip(actions, gsmis,
                        [copy.deepcopy(refmol)] * len(actions)))

                for idx, res in enumerate(imapiter):
                    try:
                        if res is None:
                            logger.error(
                                "Alignment failed and returned none for ",
                                gsmis[idx])
                            continue
                        ps, ds, ds_start, ds_old = None, None, None, []
                        new_mol, new_mol2, gs, action = res

                        if dockobj is not None:
                            dockedpose = oechem.OEMol()
                            newmol2 = oechem.OEMol(new_mol)
                            dockobj.DockMultiConformerMolecule(
                                dockedpose, newmol2, 1)
                            ds = dockedpose.GetEnergy()
                            ps = dockobj.ScoreLigand(new_mol)
                            dscores.append(ds)
                            pscores.append(ps)
                            if return_docked_pose:
                                new_mol_ = oechem.OEMol(dockedpose)

                        if self.start_dobj is not None:
                            dockedpose2 = oechem.OEMol()
                            newmol2 = oechem.OEMol(new_mol)
                            self.start_dobj.DockMultiConformerMolecule(
                                dockedpose2, newmol2, 1)
                            ds_start = dockedpose2.GetEnergy()
                            ds_start_scores.append(ds_start)
                        if self.track_hscores:
                            for olddobj in self.past_dockobjs:
                                dockedpose2 = oechem.OEMol()
                                newmol2 = oechem.OEMol(new_mol)
                                olddobj.DockMultiConformerMolecule(
                                    dockedpose2, newmol2, 1)
                                ds_old.append(dockedpose2.GetEnergy())
                                ds_old_scores.append(ds_old)

                        if dockobj is not None and return_docked_pose:
                            new_mol = new_mol_
                        oechem.OEAssignAromaticFlags(new_mol)
                        oechem.OEAddExplicitHydrogens(new_mol)
                        oechem.OE3DToInternalStereo(new_mol)
                        new_mol2 = oechem.OEMol(new_mol)

                        gs = oechem.OECreateSmiString(
                            new_mol, oechem.OESMILESFlag_DEFAULT
                            | oechem.OESMILESFlag_Hydrogens
                            | oechem.OESMILESFlag_Isotopes
                            | oechem.OESMILESFlag_BondStereo
                            | oechem.OESMILESFlag_AtomStereo)

                        logger.log(
                            f"(idx / {len(idxs)}: Pose Score {ps}, Dock Score {ds}, Init Score {ds_start}"
                        )

                        data.append((new_mol, new_mol2, gs, action))
                    except Exception as p:
                        logger.error(p)
                        traceback.print_tb(p.__traceback__)

                        continue

            self.past_dockobjs.append(dockobj)
            self.past_receptors.append(receptor)
            logger.log("Sorting on", self.sort)
            if self.sort == 'dscores':
                order = np.argsort(dscores)
                logger.log([dscores[i] for i in order])
            elif self.sort == 'pscores':
                order = np.argsort(pscores)
                logger.log([pscores[i] for i in order])
            elif self.sort == 'iscores':
                order = np.argsort(ds_start_scores)
                logger.log([ds_start_scores[i] for i in order])
            elif self.sort == 'hscores':
                hscores = [
                    np.quantile(np.clip(scoreset, None, 0), 0.)
                    for scoreset in ds_old_scores
                ]
                order = np.argsort(hscores)
                logger.log([hscores[i] for i in order])
            else:
                assert (False)

            self.env.data['dscores'].append(dscores)
            self.env.data['pscores'].append(pscores)
            self.env.data['iscores'].append(ds_start_scores)
            self.env.data['hscores'].append(ds_old_scores)
            data = [data[i] for i in order]
        return data
Ejemplo n.º 13
0
 def getoutput(self, smi):
     mol = oe.OEGraphMol()
     ok = oe.OEParseSmiles(mol, smi)
     assert ok
     return oe.OECreateSmiString(mol, flags)
Ejemplo n.º 14
0
 def getoutput(self, smi):
     mol = oe.OEGraphMol()
     ok = oe.OEParseSmiles(mol, smi)
     assert ok
     oe.OEAssignAromaticFlags(mol)
     return oe.OECreateSmiString(mol, 0)
Ejemplo n.º 15
0
def mol_to_smiles(molecule,
                  isomeric=True,
                  explicit_hydrogen=True,
                  mapped=True):
    """
    Generate canonical SMILES with OpenEye.
    Parameters
    ----------

    molecule: oechem.OEMol
    isomeric: bool
        If True, SMILES will include chirality and stereo bonds
    explicit_hydrogen: bool
        If True, SMILES will include explicit hydrogen
    mapped: bool
        If True, will include map indices (In order of OpenEye omega canonical ordering)
    generate_conformer: bool, optional. Default True
        Generating conformer is needed to infer stereochemistry if SMILES does not have stereochemistry specified. Sometimes,
        however, this can be very slow because the molecule has many rotatable bonds. Then it is recommended to turn
        off generate_conformer but the stereochemistry might not be specified in the isomeric SMILES

    Returns
    -------
    smiles str

    """

    molecule = oechem.OEMol(molecule)

    if has_atom_map(molecule):
        remove_atom_map(molecule)

    if explicit_hydrogen:
        if not has_explicit_hydrogen(molecule):
            oechem.OEAddExplicitHydrogens(molecule)

    # First check if geometry from JSON exists
    try:
        JSON_geometry = molecule.GetData('json_geometry')
    except ValueError:
        JSON_geometry = False

    if isomeric:
        if not has_stereo_defined(molecule):
            raise ValueError("Smiles must have stereochemistry defined.")

    if not explicit_hydrogen and not mapped and isomeric:
        return oechem.OEMolToSmiles(molecule)
    if not explicit_hydrogen and not mapped and not isomeric:
        return oechem.OECreateSmiString(
            molecule,
            oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_RGroups)

    if not mapped and explicit_hydrogen and isomeric:
        return oechem.OECreateSmiString(
            molecule,
            oechem.OESMILESFlag_Hydrogens | oechem.OESMILESFlag_ISOMERIC)

    if not mapped and explicit_hydrogen and not isomeric:
        return oechem.OECreateSmiString(
            molecule, oechem.OESMILESFlag_Hydrogens
            | oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_RGroups)

    if not JSON_geometry:
        # canonical order of atoms if input was SMILES or permute_xyz is true
        canonical_order_atoms(molecule)

    for atom in molecule.GetAtoms():
        atom.SetMapIdx(atom.GetIdx() + 1)

    if mapped and not explicit_hydrogen:
        raise Warning("Tagged SMILES must include hydrogens to retain order")

    if mapped and not isomeric:
        raise Warning("Tagged SMILES must include stereochemistry ")

    return oechem.OEMolToSmiles(molecule)
Ejemplo n.º 16
0
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene",
                                           proposed_mol_name="benzene",
                                           current_mol_smiles=None,
                                           proposed_mol_smiles=None,
                                           vacuum=False,
                                           render_atom_mapping=False):
    """
    This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles.
    The function will (by default) read the iupac names first.  If they are set to None, then it will attempt to read a set of current and new smiles.
    An atom mapping pdf will be generated if specified.
    Arguments
    ----------
    current_mol_name : str, optional
        name of the first molecule
    proposed_mol_name : str, optional
        name of the second molecule
    current_mol_smiles : str (default None)
        current mol smiles
    proposed_mol_smiles : str (default None)
        proposed mol smiles
    vacuum: bool (default False)
        whether to render a vacuum or solvated topology_proposal
    render_atom_mapping : bool (default False)
        whether to render the atom map of the current_mol_name and proposed_mol_name

    Returns
    -------
    topology_proposal : perses.rjmc.topology_proposal
        The topology proposal representing the transformation
    current_positions : np.array, unit-bearing
        The positions of the initial system
    new_positions : np.array, unit-bearing
        The positions of the new system
    """
    import simtk.openmm.app as app
    from openmoltools import forcefield_generators

    from openeye import oechem
    from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol
    from openmoltools import forcefield_generators
    import perses.utils.openeye as openeye
    from perses.utils.data import get_data_filename
    from perses.rjmc.topology_proposal import TopologyProposal, SystemGenerator, SmallMoleculeSetProposalEngine
    import simtk.unit as unit
    from perses.rjmc.geometry import FFAllAngleGeometryEngine

    if current_mol_name != None and proposed_mol_name != None:
        try:
            old_oemol, new_oemol = iupac_to_oemol(
                current_mol_name), iupac_to_oemol(proposed_mol_name)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(
                f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!"
            )
    elif current_mol_smiles != None and proposed_mol_smiles != None:
        try:
            old_oemol, new_oemol = smiles_to_oemol(
                current_mol_smiles), smiles_to_oemol(proposed_mol_smiles)
            old_smiles = oechem.OECreateSmiString(
                old_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
            new_smiles = oechem.OECreateSmiString(
                new_oemol,
                oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens)
        except:
            raise Exception(f"the variables are not compatible")
    else:
        raise Exception(
            f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings."
        )

    old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES(
        old_smiles, title="MOL")

    #correct the old positions
    old_positions = openeye.extractPositionsFromOEMol(old_oemol)
    old_positions = old_positions.in_units_of(unit.nanometers)

    new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES(
        new_smiles, title="NEW")

    ffxml = forcefield_generators.generateForceFieldFromMolecules(
        [old_oemol, new_oemol])

    old_oemol.SetTitle('MOL')
    new_oemol.SetTitle('MOL')

    old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol)
    new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol)

    if not vacuum:
        nonbonded_method = app.PME
        barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere,
                                             300.0 * unit.kelvin, 50)
    else:
        nonbonded_method = app.NoCutoff
        barostat = None

    gaff_xml_filename = get_data_filename("data/gaff.xml")
    system_generator = SystemGenerator(
        [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'],
        barostat=barostat,
        forcefield_kwargs={
            'removeCMMotion': False,
            'nonbondedMethod': nonbonded_method,
            'constraints': app.HBonds,
            'hydrogenMass': 4.0 * unit.amu
        })
    system_generator._forcefield.loadFile(StringIO(ffxml))

    proposal_engine = SmallMoleculeSetProposalEngine([old_smiles, new_smiles],
                                                     system_generator,
                                                     residue_name='MOL')
    geometry_engine = FFAllAngleGeometryEngine(metadata=None,
                                               use_sterics=False,
                                               n_bond_divisions=1000,
                                               n_angle_divisions=180,
                                               n_torsion_divisions=360,
                                               verbose=True,
                                               storage=None,
                                               bond_softening_constant=1.0,
                                               angle_softening_constant=1.0,
                                               neglect_angles=False)

    if not vacuum:
        #now to solvate
        modeller = app.Modeller(old_topology, old_positions)
        hs = [
            atom for atom in modeller.topology.atoms()
            if atom.element.symbol in ['H']
            and atom.residue.name not in ['MOL', 'OLD', 'NEW']
        ]
        modeller.delete(hs)
        modeller.addHydrogens(forcefield=system_generator._forcefield)
        modeller.addSolvent(system_generator._forcefield,
                            model='tip3p',
                            padding=9.0 * unit.angstroms)
        solvated_topology = modeller.getTopology()
        solvated_positions = modeller.getPositions()
        solvated_positions = unit.quantity.Quantity(value=np.array([
            list(atom_pos) for atom_pos in
            solvated_positions.value_in_unit_system(unit.md_unit_system)
        ]),
                                                    unit=unit.nanometers)
        solvated_system = system_generator.build_system(solvated_topology)

        #now to create proposal
        top_proposal = proposal_engine.propose(
            current_system=solvated_system,
            current_topology=solvated_topology,
            current_mol=old_oemol,
            proposed_mol=new_oemol)
        new_positions, _ = geometry_engine.propose(top_proposal,
                                                   solvated_positions, beta)

        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(
                f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}"
            )
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol,
                                proposal_engine.non_offset_new_to_old_atom_map)

        return top_proposal, solvated_positions, new_positions

    else:
        vacuum_system = system_generator.build_system(old_topology)
        top_proposal = proposal_engine.propose(current_system=vacuum_system,
                                               current_topology=old_topology,
                                               current_mol=old_oemol,
                                               proposed_mol=new_oemol)
        new_positions, _ = geometry_engine.propose(top_proposal, old_positions,
                                                   beta)
        if render_atom_mapping:
            from perses.utils.smallmolecules import render_atom_mapping
            print(f"new_to_old: {top_proposal._new_to_old_atom_map}")
            render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol,
                                new_oemol, top_proposal._new_to_old_atom_map)
        return top_proposal, old_positions, new_positions
Ejemplo n.º 17
0
    oechem.OECanonicalOrderBonds(mol)

    # Assign a reasonable protomer
    if args.RetainProtonation:
        for atom in mol.GetAtoms():
            atom.SetImplicitHCount(0)
    else:
        if not oequacpac.OEGetReasonableProtomer(mol):
            print 'Failed to get a reasonable protomer at pH 7.4'

    oechem.OEAssignAromaticFlags(mol, oechem.OEAroModelOpenEye)

    if not args.RetainProtonation:
        oechem.OEAddExplicitHydrogens(mol)

    smi = oechem.OECreateSmiString(mol, oechem.OESMILESFlag_Canonical)
    print 'The canonical SMILES for a reasonably protonated state is', smi

    # Generate conformations
    from openeye import oeomega

    mol_multiconf = oechem.OEMol(mol)
    oechem.OECanonicalOrderAtoms(mol_multiconf)

    omega = oeomega.OEOmega()
    # These parameters were chosen to match http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html
    omega.SetMaxConfs(800)
    omega.SetIncludeInput(False)
    omega.SetCanonOrder(False)

    omega.SetStrictStereo(False)
Ejemplo n.º 18
0
    oechem.OEThrow.Usage("%s <infile> [<outfile>]" % sys.argv[0])

ims = oechem.oemolistream()
ims.open(sys.argv[1])

if len(sys.argv) > 2:
    oms = oechem.oemolostream()
    oms.open(sys.argv[2])
else:
    oms = None

natoms = []
nchars = []
for mol in ims.GetOEGraphMols():
    na = mol.NumAtoms()
    smi = oechem.OECreateSmiString(mol)
    nc = len(re.sub(r'[^a-zA-Z]', '', smi))
    natoms.append(na)
    nchars.append(nc)
    if oms:
        mol.SetTitle("%d\t%d" % (na, nc))
        oechem.OEWriteMolecule(oms, mol)

ims.close()
if oms:
    oms.close()

A = numpy.array(natoms)
C = numpy.array(nchars)

print "%s: N: %d" % (sys.argv[0], len(natoms))
Ejemplo n.º 19
0
def get_series(mol):
    from rdkit import Chem
    from rdkit.Chem import AllChem
    from rdkit.Chem import Descriptors
    series_SMARTS_dict = {
        #"3-aminopyridine": "[R1][C,N;R0;!$(NC(=O)CN)]C(=O)[C,N;R0;!$(NC(=O)CN)][c]1cnccc1",
        "3-aminopyridine-like": "[R1]!@[C,N]C(=O)[C,N]!@[R1]",
        "3-aminopyridine-strict": "c1ccncc1NC(=O)!@[R1]",
        "Ugi": "[c,C:1][C](=[O])[N]([c,C,#1:2])[C]([c,C,#1:3])([c,C,#1:4])[C](=[O])[NH1][c,C:5]",
        "quinolones": "NC(=O)c1cc(=O)[nH]c2ccccc12",
        "piperazine-chloroacetamide": "O=C(CCl)N1CCNCC1",
        #'benzotriazoles': 'c1ccc(NC(=O)[C,N]n2nnc3ccccc32)cc1',
        #'benzotriazoles': 'a1aaa([C,N]C(=O)[C,N]a2aaa3aaaaa32)aa1',
        'benzotriazoles': 'a2aaa3aaaaa32',
    }

    smi = oechem.OECreateSmiString(mol)

    # Filter out covalent
    try:
        if oechem.OEGetSDData(mol,'acrylamide')=='True' or oechem.OEGetSDData(mol,'chloroacetamide')=='True':
            return None
    except Exception as e:
        print(e)

    def check_if_smi_in_series(
        smi, SMARTS, MW_cutoff=550, num_atoms_cutoff=70, num_rings_cutoff=10
    ):
        mol = Chem.MolFromSmiles(smi)
        MW = Chem.Descriptors.MolWt(mol)
        num_heavy_atoms = mol.GetNumHeavyAtoms()
        num_rings = Chem.rdMolDescriptors.CalcNumRings(mol)
        patt = Chem.MolFromSmarts(SMARTS)
        if (
            (
                len(
                    Chem.AddHs(Chem.MolFromSmiles(smi)).GetSubstructMatches(
                        patt
                    )
                )
                > 0
            )
            and (MW <= MW_cutoff)
            and (num_heavy_atoms <= num_atoms_cutoff)
            and (num_rings <= num_rings_cutoff)
        ):
            return True
        else:
            return False

    for series in series_SMARTS_dict:
        series_SMARTS = series_SMARTS_dict[series]
        if series == "3-amonipyridine-like":
            if check_if_smi_in_series(
                smi,
                series_SMARTS,
                MW_cutoff=410,
                num_rings_cutoff=3,
                num_atoms_cutoff=28,
            ):
                return series
        else:
            if check_if_smi_in_series(smi, series_SMARTS):
                return series
    return None
Ejemplo n.º 20
0
    def process(self, record, port):
        try:
            # The copy of the dictionary option as local variable
            # is necessary to avoid filename collisions due to
            # the parallel cube processes
            opt = dict(self.opt)

            # Create the MD record to use the MD Record API
            mdrecord = MDDataRecord(record)

            # Logger string
            opt['Logger'].info(' ')
            system_title = mdrecord.get_title
            #sys_id = mdrecord.get_flask_id
            opt['Logger'].info(
                '{}: Attempting MD Traj conversion into OEMols'.format(
                    system_title))

            traj_fn = mdrecord.get_stage_trajectory()

            opt['Logger'].info('{} Temp Directory: {}'.format(
                system_title, os.path.dirname(traj_fn)))
            opt['Logger'].info('{} Trajectory filename: {}'.format(
                system_title, traj_fn))

            # Generate multi-conformer protein and ligand OEMols from the trajectory
            opt['Logger'].info(
                '{} Generating protein and ligand trajectory OEMols'.format(
                    system_title))

            flask = mdrecord.get_flask

            md_components = record.get_value(Fields.md_components)

            # opt['Logger'].info(md_components.get_info)

            # Check Ligand Isomeric Smiles
            lig_comp = md_components.get_ligand
            lig_ref = record.get_value(Fields.ligand)

            smi_lig_comp = oechem.OECreateSmiString(lig_comp)
            smi_lig_ref = oechem.OECreateSmiString(lig_ref)

            if smi_lig_ref != smi_lig_comp:
                raise ValueError(
                    "Ligand Isomeric Smiles String check failure: {} vs {}".
                    format(smi_lig_comp, smi_lig_ref))

            ptraj, ltraj, wtraj = utl.extract_aligned_prot_lig_wat_traj(
                md_components,
                flask,
                traj_fn,
                opt,
                water_cutoff=opt['water_cutoff'])

            ltraj.SetTitle(record.get_value(Fields.ligand_name))
            ptraj.SetTitle(record.get_value(Fields.protein_name))

            opt['Logger'].info(
                '{} #atoms, #confs in protein traj OEMol: {}, {}'.format(
                    system_title, ptraj.NumAtoms(), ptraj.NumConfs()))
            opt['Logger'].info(
                '{} #atoms, #confs in ligand traj OEMol: {}, {}'.format(
                    system_title, ltraj.NumAtoms(), ltraj.NumConfs()))
            opt['Logger'].info(
                '{} #atoms, #confs in water traj OEMol: {}, {}'.format(
                    system_title, wtraj.NumAtoms(), wtraj.NumConfs()))

            # Create new record with OETraj results
            oetrajRecord = OERecord()

            oetrajRecord.set_value(OEField('LigTraj', Types.Chem.Mol), ltraj)

            if wtraj:
                oetrajRecord.set_value(OEField('WatTraj', Types.Chem.Mol),
                                       wtraj)

            if in_orion():
                oetrajRecord.set_value(Fields.collection,
                                       mdrecord.collection_id)

            mdrecord_traj = MDDataRecord(oetrajRecord)

            mdrecord_traj.set_protein_traj(ptraj,
                                           shard_name="ProteinTrajConfs_")

            record.set_value(Fields.Analysis.oetraj_rec, oetrajRecord)

            # update or initiate the list of analyses that have been done
            if record.has_value(Fields.Analysis.analysesDone):
                analysesDone = utl.RequestOEFieldType(
                    record, Fields.Analysis.analysesDone)
                analysesDone.append('OETraj')
            else:
                analysesDone = ['OETraj']

            record.set_value(Fields.Analysis.analysesDone, analysesDone)

            opt['Logger'].info(
                '{}: saved protein, ligand  and water traj OEMols'.format(
                    system_title))

            self.success.emit(record)

            del mdrecord
            del mdrecord_traj

        except Exception as e:
            print("Failed to complete", str(e), flush=True)
            self.log.error(traceback.format_exc())
            # Return failed mol
            self.failure.emit(record)

        return
Ejemplo n.º 21
0
                    can = m.can
                except:
                    iok = F #print(' conversion failed!')#pass
            else:
                m = Mol(o.zs, o.coords, ican=True)
                can = m.can
        else:
            print(f)
            m = Mol(o.zs, o.coords, ican=True)
            can = m.can

        if (can != 'None') and (can_fmt in ['oechem',]):
            from openeye import oechem
            oem = oechem.OEGraphMol()
            assert oechem.OESmilesToMol(oem, can)
            can = oechem.OECreateSmiString(oem, oechem.OESMILESFlag_Canonical)

        s1 = '' if iok else ' [ conversion failed ]'
        if isf:
            print( i+1, f, can, s1 )
        else:
            print( i+1, f, obj, can, s1 )

        if sdf:
            zs, coords, chgs, bom = m.blk
            if m.na < 100:
                sdf = f[:-4]+'.sdf'
                write_ctab(zs, chgs, bom, coords, sdf=sdf)
            else:
                pdb = f[:-4]+'.pdb'
                write_pdb(m.blk, pdb)
Ejemplo n.º 22
0
def generate_fragments(molecule, generate_visualization=False, strict_stereo=False, combinatorial=True, MAX_ROTORS=2,
                       remove_map=True, json_filename=None):
    """
    This function generates fragments from molecules. The output is a dictionary that maps SMILES of molecules to SMILES
     for fragments. The default SMILES are generated with openeye.oechem.OEMolToSmiles. These SMILES strings are canonical
     isomeric SMILES.
     The dictionary also includes a provenance field which defines how the fragments were generated.

    Parameters
    ----------
    molecule: OEMol to fragment
    generate_visualization: bool
        If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory
        where this function is run from.
    combinatorial: bool
        If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS
    MAX_ROTORS: int
        rotor threshold for combinatorial
    strict_stereo: bool
        Note: This applies to the molecule being fragmented. Not the fragments.
        If True, omega will generate conformation with stereochemistry defined in the SMILES string for charging.
    remove_map: bool
        If True, the index tags will be removed. This will remove duplicate fragments. Defualt True
    json_filename: str
        filenmae for JSON. If provided, will save the returned dictionary to a JSON file. Default is None

    Returns
    -------
    fragments: dict
        mapping of SMILES from the parent molecule to the SMILES of the fragments
    """
    fragments = dict()

    try:
        molecules = list(molecule)
    except TypeError:
        molecules = [molecule]
    for molecule in molecules:
        # normalize molecule
        molecule = normalize_molecule(molecule, molecule.GetTitle())
        if remove_map:
            # Remove tags from smiles. This is done to make it easier to find duplicate fragments
            for a in molecule.GetAtoms():
                a.SetMapIdx(0)
        frags = _generate_fragments(molecule, strict_stereo=strict_stereo)
        if not frags:
            logger().warning('Skipping {}, SMILES: {}'.format(molecule.GetTitle(), oechem.OECreateSmiString(molecule)))
            continue
        charged = frags[0]
        frags = frags[-1]
        frag_list = list(frags.values())
        if combinatorial:
            smiles = smiles_with_combined(frag_list, charged, MAX_ROTORS)
        else:
            smiles = frag_to_smiles(frag_list, charged)

        parent_smiles = mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=False, mapped=False)
        if smiles:
            fragments[parent_smiles] = list(smiles.keys())
        else:
            # Add molecule where no fragments were found for terminal torsions and / or rings and non rotatable bonds
            fragments[parent_smiles] = [mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=True, mapped=False)]

        if generate_visualization:
            IUPAC = oeiupac.OECreateIUPACName(molecule)
            name = molecule.GetTitle()
            if IUPAC == name:
                name = make_python_identifier(oechem.OEMolToSmiles(molecule))[0]
            oname = '{}.pdf'.format(name)
            ToPdf(charged, oname, frags)
        del charged, frags
    if json_filename:
        f = open(json_filename, 'w')
        j = json.dump(fragments, f, indent=2, sort_keys=True)
        f.close()

    return fragments
Ejemplo n.º 23
0
    ionic_strength_millimolar=ionic_strength_millimolar,
    pH=pH,
    fah_projects=fah_projects)

# Compounds
from fah_xchem.schema import Compound, CompoundMetadata
smiles_flag = oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_ISOMERIC

from openeye import oechem
print('Processing compounds...')
compounds = dict()
with oechem.oemolistream(compounds_sdf_filename) as ifs:
    for oemol in ifs.GetOEGraphMols():
        # Set ID and SMILES
        compound_id = oemol.GetTitle()
        smiles = oechem.OECreateSmiString(oemol, smiles_flag)
        # Extract experimental data, if present
        experimental_data = dict()
        if oechem.OEHasSDData(oemol, 'f_avg_pIC50'):
            pIC50 = oechem.OEGetSDData(oemol, 'f_avg_pIC50')
            if pIC50 != '':
                pIC50 = float(pIC50)
                experimental_data['pIC50'] = pIC50
        # Extract information about the compound
        compound_metadata = CompoundMetadata(
            compound_id=compound_id,
            smiles=oechem.OECreateSmiString(oemol, smiles_flag),
            experimental_data=experimental_data,
        )
        # Create new compound
        compound = Compound(metadata=compound_metadata, microstates=list())
Ejemplo n.º 24
0
def generate_fragments(inputf,
                       output_dir,
                       pdf=False,
                       combinatorial=True,
                       MAX_ROTORS=2,
                       strict_stereo=True,
                       remove_map=True):
    """
    This function generates fragment SMILES files sorted by rotatable bonds from an input molecule file.
    The output .smi files are written out to `output_dir` and named `nrotor_n.smi` where n corresponds to the number
    of rotatable bonds for all fragments in the file.
    Parameters
    ----------
    inputf: str
        absolute path to input molecule file
    output_dir: str
        absolute path to output directory
    pdf: bool
        If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory
        where this function is run from.
    combinatorial: bool
        If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS
    MAX_ROTORS: int
        rotor threshold for combinatorial

    """
    ifs = oechem.oemolistream()
    smiles_unique = set()

    mol = oechem.OEMol()
    if ifs.open(inputf):
        while oechem.OEReadMolecule(ifs, mol):
            openeye.normalize_molecule(mol)
            logger().info('fragmenting {}...'.format(mol.GetTitle()))
            if remove_map:
                # Remove tags from smiles. This is done to make it easier to find duplicate fragments
                for a in mol.GetAtoms():
                    a.SetMapIdx(0)
            frags = _generate_fragments(mol, strict_stereo=strict_stereo)
            if not frags:
                logger().warn('Skipping {}, SMILES: {}'.format(
                    mol.GetTitle(), oechem.OECreateSmiString(mol)))
                continue
            charged = frags[0]
            frags = frags[-1]
            if combinatorial:
                smiles = smiles_with_combined(frags,
                                              charged,
                                              MAX_ROTORS=MAX_ROTORS)
            else:
                smiles = frag_to_smiles(frags, charged)

            smiles_unique.update(list(smiles.keys()))
            if pdf:
                oname = '{}.pdf'.format(mol.GetTitle())
                ToPdf(charged, oname, frags)
            del charged, frags

    # Generate oedatabase for all fragments
    split_fname = inputf.split('.')
    base = split_fname[-2].split('/')[-1]
    ofname = base + '_frags'
    utils.to_smi(list(smiles_unique), output_dir, ofname)
    ofname_ext = ofname + '.smi'
    oedb_name = os.path.join(output_dir, ofname_ext)
    utils.create_oedatabase_idxfile(oedb_name)
    _sort_by_rotbond(oedb_name, outdir=output_dir)