Beispiel #1
0
def align2d(file1, file2):

    atomexpr = oechem.OEExprOpts_AtomicNumber | oechem.OEExprOpts_RingMember
    bondexpr = oechem.OEExprOpts_RingMember

    ifs1 = oechem.oemolistream(file1)
    ifs2 = oechem.oemolistream(file2)
    ifs1.SetConfTest(oechem.OEAbsCanonicalConfTest())
    ifs2.SetConfTest(oechem.OEAbsCanonicalConfTest())

    popts, dopts, report = prep_pdf_writer()

    for mol1, mol2 in zip(ifs1.GetOEMols(), ifs2.GetOEMols()):
        oechem.OESuppressHydrogens(mol1)
        oechem.OESuppressHydrogens(mol2)
        oechem.OEGenerate2DCoordinates(mol2)
        ss = oechem.OESubSearch(mol2, atomexpr, bondexpr)

        oechem.OEPrepareSearch(mol1, ss)
        alignres = oedepict.OEPrepareAlignedDepiction(mol1, ss)

        if not alignres.IsValid():
            oechem.OEThrow.Error(
                "Substructure is not found in input molecule!")

        cell1 = report.NewCell()
        cell2 = report.NewCell()
        oedepict.OEPrepareDepiction(mol1, popts)
        oedepict.OEPrepareDepiction(mol2, popts)
        disp1 = oedepict.OE2DMolDisplay(mol1, dopts)
        disp2 = oedepict.OE2DMolDisplay(mol2, dopts)
        oedepict.OERenderMolecule(cell1, disp1)
        oedepict.OERenderMolecule(cell2, disp2)

    ofs = oechem.oeofstream()
    if not ofs.open('output.pdf'):
        oechem.OEThrow.Fatal("Cannot open output file!")
    oedepict.OEWriteReport(ofs, "pdf", report)
Beispiel #2
0
def find_string_tag(infile):
    # read input file
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsCanonicalConfTest())
    if not ifs.open(infile):
        oechem.OEThrow.Warning("Unable to open input file for reading")
    # loop through and evaluate tags
    for mol in ifs.GetOEMols():
        for conf in mol.GetConfs():
            mytag = oechem.OEGetSDData(conf, 'SMILES QCArchive')
            count1 = mytag.count('S')
            count2 = mytag.count('P')
            count3 = mytag.count('C#N')
            count4 = mytag.count('N/N')
            print(f"{conf.GetTitle()}\t{count1}\t{count2}\t{count3}\t{count4}")
def main(infile):

    # open multi-molecule, multi-conformer file
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsCanonicalConfTest())
    if not ifs.open(infile):
        raise FileNotFoundError(f"Unable to open {infile} for reading")
    mols = ifs.GetOEMols()

    for i, mol in enumerate(mols):

        # perceive stereochemistry for mol
        oechem.OEPerceiveChiral(mol)
        oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_MDL)

        # assign charges to copy of mol
        # note that chg_mol does NOT have conformers
        try:
            chg_mol = charge_mol(mol)

        except RuntimeError:

            # perceive stereochem
            #find_unspecified_stereochem(mol)
            oechem.OE3DToInternalStereo(mol)

            # reset perceived and call OE3DToBondStereo, since it may be missed
            # by OE3DToInternalStereo if it thinks mol is flat
            mol.ResetPerceived()
            oechem.OE3DToBondStereo(mol)

            try:
                chg_mol = charge_mol(mol)
                print(f'fixed stereo: {mol.GetTitle()}')
            except RuntimeError:
                find_unspecified_stereochem(mol)

                title = mol.GetTitle()
                smilabel = oechem.OEGetSDData(mol, "SMILES QCArchive")
                print(' >>> Charge assignment failed due to unspecified '
                      f'stereochemistry {title} {smilabel}')

                continue
Beispiel #4
0
def main(infile, ffxml):

    # open multi-molecule, multi-conformer file
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsCanonicalConfTest())
    if not ifs.open(infile):
        raise FileNotFoundError(f"Unable to open {infile} for reading")
    mols = ifs.GetOEMols()

    for i, mol in enumerate(mols):

        # perceive stereochemistry for mol
        oechem.OEPerceiveChiral(mol)
        oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_MDL)

        for j, conf in enumerate(mol.GetConfs()):

            # perceive sterochemistry for conf coordinates
            oechem.OE3DToInternalStereo(conf)

            min_ffxml(conf, ffxml)
def master():
    mol = oechem.OEMol()

    ifs = oechem.oemolistream(input_smiles_file)
    ifs.SetConfTest(oechem.OEAbsCanonicalConfTest())

    for pos, mol in enumerate(ifs.GetOEMols()):
        smiles = oechem.OEMol(mol)
        ligand_name = smiles.GetTitle()

        status = MPI.Status()
        comm.recv(source=MPI.ANY_SOURCE, tag=WORKTAG, status=status)
        rank_from = status.Get_source()
        data = (pos, smiles, ligand_name)
        comm.send(data, dest=rank_from, tag=WORKTAG)

        if args.v == 1 and pos % 1000 == 0:
            print("sent", pos, "jobs", flush=True)

    for i in range(1, world_size):
        comm.send([], dest=i, tag=DIETAG)
    comm.Barrier()
Beispiel #6
0
def read_mols(in_file, mol_slice=None):
    """
    Open a molecule file and return molecules and conformers as OEMols.
    Provide option to slice the mols to return only a chunk from the
    specified indices.

    Parameters
    ----------
    in_file : string
        name of input file with molecules
    mol_slice : numpy slice object
        The resulting integers are numerically sorted and duplicates removed.
        e.g., slices = np.s_[0, 3:5, 6::3] would be parsed to return
        [0, 3, 4, 6, 9, 12, 15, 18, ...]
        Can also parse from end: [-3:] gets the last 3 molecules, and
        [-2:-1] is the same as [-2] to get just next to last molecule.

    Returns
    -------
    mols : OEMols

    """
    def flatten(x):
        # https://stackoverflow.com/questions/2158395/flatten-an-irregular-list-of-lists
        if isinstance(x, collections.Iterable):
            return [a for i in x for a in flatten(i)]
        else:
            return [x]

    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsCanonicalConfTest())
    if not ifs.open(in_file):
        raise FileNotFoundError(f"Unable to open {in_file} for reading")
    mols = ifs.GetOEMols()

    if mol_slice is None:
        return mols

    # set max number of molecules for decoding slices
    # TODO: how to get num_mols without re-reading file and loading all mols
    ifs2 = oechem.oemolistream()
    ifs2.SetConfTest(oechem.OEAbsCanonicalConfTest())
    ifs2.open(in_file)
    mols2 = ifs2.GetOEMols()
    num_mols = len(list(mols2))

    # parse mol_slice for multiple slice definitions provided
    # e.g., (1, 4, 8) for second, fifth, and ninth molecules
    # e.g., (0, slice(3, 5, None), slice(6, None, 3)) for example in docs

    if isinstance(mol_slice, tuple) or isinstance(mol_slice, list):
        idx_to_keep = []
        for s in mol_slice:

            # parse the slice object
            if isinstance(s, slice):
                idx_to_keep.append(list(range(num_mols))[s])

            # else decode the negative int to positive int
            elif isinstance(s, int) and s < 0:
                idx_to_keep.append(s + num_mols)

            # else just append the positive int
            elif isinstance(s, int):
                idx_to_keep.append(s)

            else:
                raise ValueError(
                    f"ERROR in parsing 'mol_slice' from {mol_slice}"
                    f" due to {s} being neither slice nor int")

        # flatten to 1d, use set to remove duplicates, then sort list
        idx_to_keep = list(set(flatten(idx_to_keep)))
        idx_to_keep.sort()
        #print(idx_to_keep)

    elif isinstance(mol_slice, slice):
        # parse the slice object
        idx_to_keep = list(range(num_mols))[mol_slice]

    # else just store the single value in a list
    elif isinstance(mol_slice, int):
        if mol_slice < 0:
            mol_slice = mol_slice + num_mols
        idx_to_keep = list(mol_slice)

    else:
        raise ValueError(f"ERROR in parsing 'mol_slice' from {mol_slice}")

    # go through the generator and retrive the specified slices
    mlist = []
    for i, m in enumerate(mols):
        if i in idx_to_keep:

            # append a copy else still linked to orig generator
            mlist.append(copy.copy(m))

            # if this index is the last one in idx_to_keep, finish now
            if i == idx_to_keep[-1]:
                return mlist

    return mlist
Beispiel #7
0
def main(infile, outfile, ffxml, minimizer):

    # open multi-molecule, multi-conformer file
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsCanonicalConfTest())
    if not ifs.open(infile):
        raise FileNotFoundError(f"Unable to open {infile} for reading")
    mols = ifs.GetOEMols()

    # open an outstream file
    ofs = oechem.oemolostream()
    if os.path.exists(outfile):
        raise FileExistsError("Output file {} already exists in {}".format(
            outfile, os.getcwd()))
    if not ofs.open(outfile):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % outfile)

    # minimize with openforcefield ffxml file
    for i, mol in enumerate(mols):

        # perceive stereochemistry for mol
        oechem.OEPerceiveChiral(mol)
        oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_MDL)

        # assign charges to copy of mol
        # note that chg_mol does NOT have conformers
        try:
            chg_mol = charge_mol(mol)

        except RuntimeError:
            # perceive stereochem
            #find_unspecified_stereochem(mol)
            oechem.OE3DToInternalStereo(mol)

            # reset perceived and call OE3DToBondStereo, since it may be missed
            # by OE3DToInternalStereo if it thinks mol is flat
            mol.ResetPerceived()
            oechem.OE3DToBondStereo(mol)

            try:
                chg_mol = charge_mol(mol)
                print(f'fixed stereo: {mol.GetTitle()}')
            except RuntimeError:
                title = mol.GetTitle()
                smilabel = oechem.OEGetSDData(mol, "SMILES QCArchive")
                print(' >>> Charge assignment failed due to unspecified '
                      f'stereochemistry {title} {smilabel}')
                continue

        for j, conf in enumerate(mol.GetConfs()):

            # perceive sterochemistry for conf coordinates
            oechem.OE3DToInternalStereo(conf)

            # assign charges to the conf itself
            chg_conf = charge_conf(chg_mol, conf)

            if minimizer == 'ffxml':
                # minimize with parsley (charges set by ff not used from conf)
                min_ffxml(chg_conf, ofs, ffxml)

            if minimizer == 'mmff94':
                # minimize with mmff94
                min_mmff94x(chg_conf, ofs, mmff94s=False)

            if minimizer == 'mmff94s':
                # minimize with mmff94S
                min_mmff94x(chg_conf, ofs, mmff94s=True)

            if minimizer == 'gaff':
                # minimize with gaff
                min_gaffx(chg_conf, ofs, gaff2=False)

            if minimizer == 'gaff2':
                # minimize with gaff2
                min_gaffx(chg_conf, ofs, gaff2=True)

    ifs.close()
    ofs.close()