def sanitize_fragment(mol):
    approved_tags = [
        "TORSION_ATOMPROP",
        "TORSION_ATOMS_FRAGMENT",
        "TORSION_ATOMS_ParentMol",
        "COUNT",
    ]

    for dp in oechem.OEGetSDDataPairs(mol):
        if dp.GetTag() in approved_tags:
            continue
        oechem.OEDeleteSDData(mol, dp.GetTag())
Beispiel #2
0
def delete_tag(mol, tag):
    """
    Delete specified SD tag from all conformers of mol.

    Parameters
    ----------
    mol:        OEChem molecule with all of its conformers
    tag:        exact string label of the data to delete

    """
    for j, conf in enumerate(mol.GetConfs()):
        oechem.OEDeleteSDData(conf, tag)
Beispiel #3
0
def delete_tag(mol, tag):
    """
    Delete specified SD tag from all conformers of mol.

    Note: Multi-conformer molecule must be specified
    else will get AttributeError:
    'OEGraphMol' object has no attribute 'GetConfs'.

    Parameters
    ----------
    mol : multi-conformer OEChem molecule
    tag : string
        exact label of the data to delete

    """
    for j, conf in enumerate(mol.GetConfs()):
        oechem.OEDeleteSDData(conf, tag)
Beispiel #4
0
    def FilterMolData(self, mol):
        if not oechem.OEHasSDData(mol):
            return 0

        if self.fields is None:
            return -1

        if len(self.fields) == 0:
            oechem.OEClearSDData(mol)
            return 0

        validdata = 0
        deletefields = []
        for dp in oechem.OEGetSDDataPairs(mol):
            tag = dp.GetTag()
            if tag not in self.fields:
                deletefields.append(tag)
                continue

            value = oechem.OEGetSDData(mol, tag)
            if self.asFloating:
                try:
                    float(value)
                except ValueError:
                    oechem.OEThrow.Warning("Failed to convert %s to numeric value (%s) in %s" %
                                           (tag, value, mol.GetTitle()))
                    deletefields.append(tag)
                    continue

            validdata += 1

        if not validdata:
            oechem.OEClearSDData(mol)
        else:
            for nuke in deletefields:
                oechem.OEDeleteSDData(mol, nuke)

        return validdata
def delete_sd_data(mol, tag, locator_tag):
    if oechem.OEHasSDData(mol, locator_tag):
        return oechem.OEDeleteSDData(mol, tag)
    elif oechem.OEHasSDData(mol.GetActive(), locator_tag):
        return oechem.OEDeleteSDData(mol.GetActive(), tag)
    return False
Beispiel #6
0
                        f'{molecule.GetTitle()} - {phase}.{ext}')
                    if not os.path.exists(filename):
                        files_missing = True
            if files_missing:
                continue

            # Add RUN number
            oechem.OESetSDData(molecule, 'run', f'RUN{run_index}')

            if args.clean:
                for sdpair in oechem.OEGetSDDataPairs(molecule):
                    if sdpair.GetTag() not in [
                            'Hybrid2', 'docked_fragment', 'fragments', 'site',
                            'run'
                    ]:
                        oechem.OEDeleteSDData(molecule, sdpair.GetTag())

            # Copy files
            run_dir = os.path.join(args.docked_basedir, 'fah-gromacs',
                                   f'RUN{run_index}')
            os.makedirs(run_dir, exist_ok=True)
            import shutil
            for phase in ['complex', 'ligand']:
                for ext in ['gro', 'top']:
                    src = os.path.join(
                        gromacs_basedir,
                        f'{molecule.GetTitle()} - {phase}.{ext}')
                    dst = os.path.join(run_dir, f'{phase}.{ext}')
                    shutil.copyfile(src, dst)

            oechem.OEWriteMolecule(ofs, molecule)
def DumpSDData(mol):
    print("SD data of", mol.GetTitle())
    # loop over SD data
    for dp in oechem.OEGetSDDataPairs(mol):
        print(dp.GetTag(), ':', dp.GetValue())
    print()


mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1ccccc1")
mol.SetTitle("benzene")

# set some tagged data
oechem.OESetSDData(mol, "color", "brown")
oechem.OESetSDData(mol, oechem.OESDDataPair("size", "small"))
DumpSDData(mol)

# check for existence of data, then delete it
if oechem.OEHasSDData(mol, "size"):
    oechem.OEDeleteSDData(mol, "size")
DumpSDData(mol)

# add additional color data
oechem.OEAddSDData(mol, "color", "black")
DumpSDData(mol)

# remove all SD data
oechem.OEClearSDData(mol)
DumpSDData(mol)
# @ </SNIPPET>
def save_profile_as_sd(mol: oechem.OEGraphMol):
    oechem.OEDeleteSDData(mol, TOTAL_STRAIN_TAG)
    oechem.OESetSDData(mol, TOTAL_STRAIN_TAG, '')  # place holder

    oechem.OEDeleteSDData(mol, NUM_TORSION_PROFILES_TAG)
    oechem.OESetSDData(mol, NUM_TORSION_PROFILES_TAG, '')

    oechem.OEDeleteSDData(mol, NUM_LOW_CONFIDENCE_TORSIONS_TAG)
    oechem.OESetSDData(mol, NUM_LOW_CONFIDENCE_TORSIONS_TAG, '')

    strain_arr = np.zeros(1)
    strain_arr_high_conf_preds = np.zeros(1)

    num_torsion_profiles = 0
    num_low_confidence_torsions = 0

    can_torsions = get_canonical_torsions(mol)
    for num, can_torsion in enumerate(can_torsions):
        bond = mol.GetBond(can_torsion.b, can_torsion.c)
        if bond is not None and bond.HasData(ENERGY_PROFILE_TAG):
            num_torsion_profiles += 1
            bond_strains = bond.GetData(STRAIN_TAG)
            profile_offset = bond.GetData(PROFILE_OFFSET_TAG)
            if profile_offset < OFFSET_THRESHOLD and (
                    not bond.HasData(SKIP_TORSION_TAG)):
                strain_arr_high_conf_preds += np.array(bond_strains)

            strain_arr += np.array(bond_strains)

            offset = bond.GetData(PROFILE_OFFSET_TAG)
            profile_str = bond.GetData(ENERGY_PROFILE_TAG)
            pred_confidence_value = HIGH_PREDICTION_CONFIDENCE_TAG
            if offset > OFFSET_THRESHOLD or bond.HasData(SKIP_TORSION_TAG):
                profile_str = 'LOW CONFIDENCE - ' + profile_str
                pred_confidence_value = LOW_PREDICTION_CONFIDENCE_TAG
                num_low_confidence_torsions += 1

            #tor_atoms_str = bond.GetData(TORSION_ATOMS_FRAGMENT_TAG)
            #tor_atoms_str_list = tor_atoms_str.split(':')
            #a_idx, b_idx, c_idx, d_idx = list(map(int, tor_atoms_str_list[0].split()))

            tor_atoms_str1 = bond.GetData(TORSION_ATOMS_FRAGMENT_TAG)
            ca, cb, cc, cd = list(map(int, tor_atoms_str1.split()))

            apStr = "{}:{}:{}:{}".format(ca + 1, cb + 1, cc + 1, cd + 1)

            atom_ca = mol.GetAtom(oechem.OEHasAtomIdx(ca))
            atom_cb = mol.GetAtom(oechem.OEHasAtomIdx(cb))
            atom_cc = mol.GetAtom(oechem.OEHasAtomIdx(cc))
            atom_cd = mol.GetAtom(oechem.OEHasAtomIdx(cd))
            angle_float = oechem.OEGetTorsion(mol, atom_ca, atom_cb, atom_cc,
                                              atom_cd) * oechem.Rad2Deg

            sd_tag1 = 'TORSION_%s_ATOMS' % (num + 1)
            sd_tag2 = 'TORSION_%d_TORSIONNET_%s' % (num + 1,
                                                    ENERGY_PROFILE_TAG)
            sd_tag3 = 'TORSION_%d_TORSIONNET_PRED_CONFIDENCE' % (num + 1)
            sd_tag4 = 'TORSION_%d_TORSIONNET_PROFILE_OFFSET' % (num + 1)

            oechem.OEDeleteSDData(mol, sd_tag1)
            oechem.OEDeleteSDData(mol, sd_tag2)
            oechem.OEDeleteSDData(mol, sd_tag3)
            oechem.OEDeleteSDData(mol, sd_tag4)

            oechem.OESetSDData(mol, sd_tag1, apStr)
            oechem.OESetSDData(mol, sd_tag2, profile_str)

            sd_tag6 = 'TORSION_%d_%s' % ((num + 1), STRAIN_TAG)
            oechem.OEDeleteSDData(mol, sd_tag6)
            oechem.OESetSDData(mol, sd_tag6, '%.1f' % bond_strains)

            angle = '%.1f' % angle_float
            sd_tag5 = 'TORSION_%d_ANGLE' % (num + 1)
            oechem.OEDeleteSDData(mol, sd_tag5)
            oechem.OESetSDData(mol, sd_tag5, angle)
            oechem.OESetSDData(mol, sd_tag3, pred_confidence_value)
            oechem.OESetSDData(mol, sd_tag4, '%.2f' % offset)

    strain_str = '%.1f' % strain_arr_high_conf_preds[0]
    oechem.OESetSDData(mol, TOTAL_STRAIN_TAG, strain_str)
    oechem.OESetSDData(mol, NUM_TORSION_PROFILES_TAG,
                       str(num_torsion_profiles))
    oechem.OESetSDData(mol, NUM_LOW_CONFIDENCE_TORSIONS_TAG,
                       str(num_low_confidence_torsions))

    reorder_sd_props(mol)

    return mol
    if docked_molecule is None:
        print('No docking poses available')
        import sys
        sys.exit(0)

    import os
    from openeye import oechem, oedocking

    # Write molecule as CSV with cleared SD tags
    output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - docked.csv')
    if not os.path.exists(output_filename):
        docked_molecule_clean = docked_molecule.CreateCopy()
        for sdpair in oechem.OEGetSDDataPairs(docked_molecule_clean):
            if sdpair.GetTag() not in ['Hybrid2', 'fragments', 'site', 'docked_fragment']:
                oechem.OEDeleteSDData(docked_molecule_clean, sdpair.GetTag())
        with oechem.oemolostream(output_filename) as ofs:
            oechem.OEWriteMolecule(ofs, docked_molecule_clean)

    # Write molecule as SDF
    output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.sdf')
    if not os.path.exists(output_filename):
        with oechem.oemolostream(output_filename) as ofs:
            oechem.OEWriteMolecule(ofs, docked_molecule)

    # Write molecule as mol2
    output_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.mol2')
    if not os.path.exists(output_filename):
        with oechem.oemolostream(output_filename) as ofs:
            oechem.OEWriteMolecule(ofs, docked_molecule)
    import drconvert
    for fragment in tqdm(all_fragments):
        filename = f'../docking/{prefix} - docked to {fragment}.oeb'
        if os.path.exists(filename):
            datarecords = drconvert.RecordConvertToMols(filename)
            for molecule in datarecords:
                # Annotate which fragment this was docked to
                oechem.OESetSDData(molecule, 'fragments', fragment)

                if molecule.NumAtoms() > 1000:
                    # Store protein
                    molecule.SetTitle(fragment)
                    receptors[fragment] = oechem.OEMol(molecule)
                else:
                    # Store ligands in best docked poses
                    oechem.OEDeleteSDData(molecule, 'Number of Confs')
                    CID = molecule.GetTitle()
                    if CID not in docked_molecules:
                        docked_molecules[CID] = oechem.OEMol(molecule)
                    else:
                        if score(molecule) < score(docked_molecules[CID]):
                            docked_molecules[CID] = oechem.OEMol(molecule)

    # Sort compounds
    docked_molecules = [docked_molecules[CID] for CID in docked_molecules]
    docked_molecules.sort(key=score)

    # Write all molecules in order of increasing score
    print('Writing molecules to CSV...')
    filename = f'{prefix} - top docked.csv'
    with oechem.oemolostream(filename) as ofs:
Beispiel #11
0
def generate_fragalysis(
    series: CompoundSeriesAnalysis,
    fragalysis_config: FragalysisConfig,
    results_path: str,
) -> None:
    """
    Generate input and upload to fragalysis from fragalysis_config

    Fragalysis spec:https://discuss.postera.ai/t/providing-computed-poses-for-others-to-look-at/1155/8?u=johnchodera​

    Parameters
    ----------
    series : CompoundSeriesAnalysis
        Analysis results
    fragalysis_config : FragalysisConfig
        Fragalysis input paramters
    results_path : str
        The path to the results
    """

    import os
    from openeye import oechem
    from rich.progress import track

    # make a directory to store fragalysis upload data
    fa_path = os.path.join(results_path, "fragalysis_upload")
    os.makedirs(fa_path, exist_ok=True)

    ref_mols = fragalysis_config.ref_mols  # e.g. x12073
    ref_pdb = fragalysis_config.ref_pdb  # e.g. x12073

    # set paths
    ligands_path = os.path.join(results_path,
                                fragalysis_config.ligands_filename)
    fa_ligands_path = os.path.join(fa_path,
                                   fragalysis_config.fragalysis_sdf_filename)

    # copy sprint generated sdf to new name for fragalysis input
    from shutil import copyfile

    copyfile(ligands_path, fa_ligands_path)

    # Read ligand poses
    molecules = []

    with oechem.oemolistream(ligands_path) as ifs:
        oemol = oechem.OEGraphMol()
        while oechem.OEReadMolecule(ifs, oemol):
            molecules.append(oemol.CreateCopy())
    print(f"{len(molecules)} ligands read")

    # Get zipped PDB if specified
    if fragalysis_config.ref_pdb == "references.zip":
        consolidate_protein_snapshots_into_pdb(
            oemols=molecules,
            results_path=results_path,
            pdb_filename="references.pdb",
            fragalysis_input=True,
            fragalysis_path=fa_path,
        )

    descriptions = {
        "DDG (kcal/mol)":
        "Relative computed free energy difference",
        "dDDG (kcal/mol)":
        "Uncertainty in computed relative free energy difference",
        "ref_mols":
        "a comma separated list of the fragments that inspired the design of the new molecule (codes as they appear in fragalysis - e.g. x0104_0,x0692_0)",
        "ref_pdb":
        "The name of the fragment (and corresponding Mpro fragment structure) with the best scoring hybrid docking pose",
        "original SMILES":
        "the original SMILES of the compound before any computation was carried out",
    }

    # Preprocess molecules
    tags_to_retain = {"DDG (kcal/mol)", "dDDG (kcal/mol)"}
    index = 0
    for oemol in track(molecules, "Preprocessing molecules for Fragalysis..."):
        # Remove hydogrens
        oechem.OESuppressHydrogens(oemol, True)
        # Get original SMILES
        original_smiles = oechem.OEGetSDData(oemol, "SMILES")
        # Remove irrelevant SD tags
        for sdpair in oechem.OEGetSDDataPairs(oemol):
            tag = sdpair.GetTag()
            value = sdpair.GetValue()
            if tag not in tags_to_retain:
                oechem.OEDeleteSDData(oemol, tag)
        # Add required SD tags
        oechem.OESetSDData(oemol, "ref_mols", fragalysis_config.ref_mols)

        # If ref_pdb is zip file, use this
        if fragalysis_config.ref_pdb == "references.zip":
            oechem.OESetSDData(oemol, "ref_pdb",
                               f"references/references_{index}.pdb"),
            index += 1
        else:
            oechem.OESetSDData(oemol, "ref_pdb", fragalysis_config.ref_pdb)

        oechem.OESetSDData(oemol, "original SMILES", original_smiles)

    # Add initial blank molecule (that includes distances)
    import copy
    from datetime import datetime

    # Find a molecule that includes distances, if present
    oemol = molecules[0].CreateCopy()
    # Add descriptions to each SD field
    for sdpair in oechem.OEGetSDDataPairs(oemol):
        tag = sdpair.GetTag()
        value = sdpair.GetValue()
        oechem.OESetSDData(oemol, tag, descriptions[tag])

    # Add other fields
    oemol.SetTitle("ver_1.2")
    oechem.OESetSDData(oemol, "ref_url", fragalysis_config.ref_url)
    oechem.OESetSDData(oemol, "submitter_name",
                       fragalysis_config.submitter_name)
    oechem.OESetSDData(oemol, "submitter_email",
                       fragalysis_config.submitter_email)
    oechem.OESetSDData(oemol, "submitter_institution",
                       fragalysis_config.submitter_institution)
    oechem.OESetSDData(oemol, "generation_date",
                       datetime.today().strftime("%Y-%m-%d"))
    oechem.OESetSDData(oemol, "method", fragalysis_config.method)
    molecules.insert(0, oemol)  # make it first molecule

    # Write sorted molecules
    with oechem.oemolostream(fa_ligands_path) as ofs:
        for oemol in track(molecules,
                           description="Writing Fragalysis SDF file..."):
            oechem.OEWriteMolecule(ofs, oemol)

    # TODO add check SDF step here?

    # Upload to fragalysis
    print("Uploading to Fragalysis...")
    print(f"--> Target: {fragalysis_config.target_name}")

    from fragalysis_api.xcextracter.computed_set_update import update_cset, REQ_URL

    if fragalysis_config.new_upload:
        update_set = "None"  # new upload
        print(f"--> Uploading a new set")
    else:
        update_set = ("".join(fragalysis_config.submitter_name.split()) + "-" +
                      "".join(fragalysis_config.method.split()))

        print(f"--> Updating set: {update_set}")

    if fragalysis_config.ref_pdb == "references.zip":
        pdb_zip_path = os.path.join(fa_path, "references.zip")
    else:
        pdb_zip_path = None

    taskurl = update_cset(
        REQ_URL,
        target_name=fragalysis_config.target_name,
        sdf_path=fa_ligands_path,
        pdb_zip_path=pdb_zip_path,
        update_set=update_set,
        upload_key=fragalysis_config.upload_key,
        submit_choice=1,
        add=False,
    )

    print(f"Upload complete, check upload status: {taskurl}")
Beispiel #12
0
def RemoveProps(proplist, ifs, ofs):
    for mol in ifs.GetOEGraphMols():
        for tag in proplist:
            oechem.OEDeleteSDData(mol, tag)
        oechem.OEWriteMolecule(ofs, mol)
Beispiel #13
0
def KeepProps(proplist, ifs, ofs):
    for mol in ifs.GetOEGraphMols():
        for dp in oechem.OEGetSDDataPairs(mol):
            if dp.GetTag() not in proplist:
                oechem.OEDeleteSDData(mol, dp.GetTag())
        oechem.OEWriteMolecule(ofs, mol)