Python Molecule.from_mapped_smilesの例、openforcefield.topology.Molecule.from_mapped_smiles Pythonの例

コード例 #1

0

ファイルを表示

ファイル: regenerate_dict.py プロジェクト: jmaat/fragmenter_data

def checkTorsion(smiles, torsion_indices, ff_name):
    """
    Take mollist and check if the molecules in a list match a specific torsion id

        Parameters
        ----------
        molList : List of objects
            List of oemols with datatags generated in genData function

        Returns
        -------
        molList : list of objects
            List of oemol objects that have a datatag "IDMatch" that contain the torsion id
            involved in the QCA torsion drive
    """

    matches = []
    count = 0
    mols = []
    #tid=''
    #molecule = Molecule.from_mapped_smiles(smiles)
    print(smiles)
    from openeye import oechem
    # create a new molecule
    #mol = oechem.OEGraphMol()
    # convert the SMILES string into a molecule
    #oechem.OESmilesToMol(mol,smiles)
    #molecule = Molecule.from_smiles(smiles)
    #molecule=Molecule.from_openeye(mol)

    molecule = Molecule.from_mapped_smiles(smiles)
    topology = Topology.from_molecules(molecule)
    # Let's label using the Parsley force field
    forcefield = ForceField(ff_name, allow_cosmetic_attributes=True)
    # Run the molecule labeling
    molecule_force_list = forcefield.label_molecules(topology)
    params = []
    indices = []
    # Print out a formatted description of the torsion parameters applied to this molecule
    for mol_idx, mol_forces in enumerate(molecule_force_list):
        # print(f'Forces for molecule {mol_idx}')
        for force_tag, force_dict in mol_forces.items():
            if force_tag == "ProperTorsions":
                for (atom_indices, parameter) in force_dict.items():
                    params.append(parameter.id)
                    indices.append(atom_indices)
                    #torsion_indices=tuple(torsion_indices)
                    #print(type(torsion_indices))
                    print(torsion_indices)
                    #print(type(atom_indices))
                    print(atom_indices)
                    if atom_indices == torsion_indices or tuple(
                            reversed(atom_indices)) == torsion_indices:
                        #mol.SetData("IDMatch", parameter.id)
                        tid = parameter.id
    print(params)
    print(indices)
    return tid

コード例 #2

0

ファイルを表示

def get_assigned_torsion_param(tdentry, forcefield):
    """Get the OpenFF forcefield torsion parameter ultimately assigned to the
    given TorsionDrive entry's torsion dihedral.

    Parameters
    ----------
    tdentry : TDEntry
        TDEntry (TorsionDrive entry) to operate on;
        will be used to generate molecule, extract dihedral indices driven.
    forcefield : str, ForceField
        OpenFF forcefield to apply.

    Returns
    -------
    torsion_params : ProperTorsion
        Dict-like object with attributes giving the applied torsion parameters

    Examples
    --------
    Starting with TDEntries from usage of `get_torsiondrives_matching_smarts`
    (see its Example), we can get back the parameter assigned to this by, say
    `"openff-1.0.0.offxml"`:
    
    >>> from openforcefield.typing.engines.smirnoff import ForceField
    >>> tdentries = get_torsiondrives_matching_smarts(smarts, dataset, client)
    >>> ff = ForceField('openff-1.0.0.offxml')
    >>> assigned = [smarts_torsions.get_assigned_torsion_param(tdentry, ff)
                    for tdentry in tdentries]

    >>> print([t.id for t in assigned])
        ['t47', 't47', 't47', 't47', ...]

    """
    mol_smiles = tdentry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"]
    offmol = Molecule.from_mapped_smiles(mol_smiles)

    if isinstance(forcefield, str):
        forcefield = ForceField(forcefield)

    # apply forcefield parameters
    topology = Topology.from_molecules(offmol)
    
    # we only have one molecule by definition here, so extracting 0th
    molecule_forces = forcefield.label_molecules(topology)[0]

    # by convention, we only have one driven torsion
    # would need to revisit if we are working with 2D torsions
    dihedral_indices = tdentry.td_keywords.dihedrals[0]

    # get torsion parameters corresponding to dihedral indices
    torsions = molecule_forces["ProperTorsions"]
    torsion_params = torsions.get(dihedral_indices)

    # if None, try reversing it
    if torsion_params is None:
        torsion_params = torsions[dihedral_indices[::-1]]

    return torsion_params

コード例 #3

0

ファイルを表示

def checkTorsion(molList, ff_name):
    """
    Take mollist and check if the molecules in a list match a specific torsion id

        Parameters
        ----------
        molList : List of objects
            List of oemols with datatags generated in genData function

        Returns
        -------
        molList : list of objects
            List of oemol objects that have a datatag "IDMatch" that contain the torsion id
            involved in the QCA torsion drive
    """

    matches = []
    count = 0
    mols = []
    for mol in molList:
        molecule = Molecule.from_mapped_smiles(mol.GetData("cmiles"))
        topology = Topology.from_molecules(molecule)
        # Let's label using the Parsley force field
        forcefield = ForceField(ff_name)
        # Run the molecule labeling
        molecule_force_list = forcefield.label_molecules(topology)
        params = []
        # Print out a formatted description of the torsion parameters applied to this molecule
        for mol_idx, mol_forces in enumerate(molecule_force_list):
            # print(f'Forces for molecule {mol_idx}')
            for force_tag, force_dict in mol_forces.items():
                if force_tag == "ProperTorsions":
                    for (atom_indices, parameter) in force_dict.items():
                        params.append(parameter.id)
                        if atom_indices == mol.GetData("TDindices") or tuple(
                            reversed(atom_indices)
                        ) == mol.GetData("TDindices"):
                            count += 1
                            mol.SetData("IDMatch", parameter.id)
                            mols.append(mol)
    print(
        "Out of "
        + str(len(molList))
        + " molecules, "
        + str(count)
        + " were processed with checkTorsion()"
    )

    return mols

コード例 #4

0

ファイルを表示

ファイル: label_mol.py プロジェクト: origamimantis/wbointerpolation

def checkParam(cmiles, ff2):

    molecules=Molecule.from_mapped_smiles(cmiles)
    topology = Topology.from_molecules([molecules])


    #added
    # Let's label using the Parsley force field
    forcefield2 = ForceField(ff2, allow_cosmetic_attributes=True)
    # Run the molecule labeling
    molecule_force_list = forcefield2.label_molecules(topology)
    #print(dict(molecule_force_list[0]['ProperTorsions']))
    # Print out a formatted description of the torsion parameters applied to this molecule
    #plot_dict = {}
    for mol_idx, mol_forces in enumerate(molecule_force_list):
        for force_tag, force_dict in mol_forces.items():
            print(force_tag)
            if force_tag == 'Bonds':
                for (atom_indices, parameter) in force_dict.items():
                    if parameter.id == 'b1':
                        print('match')
                        return cmiles

コード例 #5

0

ファイルを表示

    def _apply(self, molecules: List[Molecule]) -> ComponentResult:
        """
        Fragment the molecules using the WBOFragmenter.

        Parameters:
            molecules: The list of molecules which should be processed by this component.

        Note:
            * If the input molecule fails fragmentation it will be fail this component and be removed even when
            `include_parent` is set to true.
            * When a molecule can not be fragmented to meet the wbo threshold the parent is likely to be included in the
            dataset.
            *
        """
        from fragmenter import fragment

        result = self._create_result()

        for molecule in molecules:
            # not having a conformer can cause issues
            if molecule.n_conformers == 0:
                molecule.generate_conformers(n_conformers=1)

            if self.include_parent:
                result.add_molecule(molecule)

            fragment_factory = fragment.WBOFragmenter(
                molecule=molecule.to_openeye(),
                functional_groups=self.functional_groups,
                verbose=False,
            )

            try:
                fragment_factory.fragment(
                    threshold=self.threshold,
                    keep_non_rotor_ring_substituents=self.
                    keep_non_rotor_ring_substituents,
                )

                # we need to store the central bond which was fragmented around
                # to make sure this is the bond we torsiondrive around
                fragmets_dict = fragment_factory.to_torsiondrive_json()

                # check we have fragments
                if fragmets_dict:

                    for fragment_data in fragmets_dict.values():
                        frag_mol = Molecule.from_mapped_smiles(
                            mapped_smiles=fragment_data["identifiers"]
                            ["canonical_isomeric_explicit_hydrogen_mapped_smiles"]
                        )
                        torsion_index = tuple(fragment_data["dihedral"][0])
                        # this is stored back into the molecule and will be used when generating the cmiles tags latter
                        torsion_tag = TorsionIndexer()
                        torsion_tag.add_torsion(torsion=torsion_index)
                        frag_mol.properties["dihedrals"] = torsion_tag
                        result.add_molecule(frag_mol)

                # if we have no fragments and we dont want the parent then we failed to fragment
                elif not fragmets_dict and not self.include_parent:
                    result.filter_molecule(molecule)

            except (RuntimeError, ValueError):
                # this will catch cmiles errors for molecules with undefined stero
                result.filter_molecule(molecule)

        return result

コード例 #6

0

ファイルを表示

def get_torsiondrives_matching_smarts(smarts, datasets, client):
    """Get all TorsionDrive entries from the given QCArchive instance datasets that
    match the given SMARTS pattern.

    Parameters
    ----------
    smarts : str
        SMARTS to query TorsionDrives against;
        if the SMARTS has 4 indexed atoms, then these must match the dihedral atoms exercised in the TorsionDrive;
        if the SMARTS has 2 indexed atoms, then these must match the atoms of the central, rotated bond in the TorsionDrive.
    datasets : iterable of strings
        TorsionDriveDataset names to sample from.
    client : qcportal.FractalClient
        Fractal client to use for database queries.

    Returns
    -------
    tdentries : list
        List of TDEntries matching given SMARTS.

    Examples
    --------
    Get back TDEntries corresponding to a C-C bond torsiondrive:
    
    >>> from qcportal import FractalClient
    >>> client = FractalClient()
    >>> smarts = "[*:1]~[#6:2]-[#6:3]~[*:4]"
    >>> datasets = ["OpenFF Substituted Phenyl Set 1"]
    >>> tdentries = get_torsiondrives_matching_smarts(smarts, dataset, client)

    Equivalent to the above due to wildcard matching, but specifying only central bond:

    >>> smarts = "[#6:1]-[#6:2]"
    >>> tdentries = get_torsiondrives_matching_smarts(smarts, dataset, client)


    """


    # first, we want to grab all datasets into memory
    tdrs = []
    for dataset in datasets:
        ds = client.get_collection("TorsionDriveDataset", dataset)

        for entry in ds.data.records.values():

            # need to build molecule from smiles so we can query against it
            mol_smiles = entry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"]
            offmol = Molecule.from_mapped_smiles(mol_smiles)

            # apply SMARTS to each TorsionDrive object
            matching_indices = offmol.chemical_environment_matches(smarts)

            # if we get back nothing, move on
            if len(matching_indices) == 0:
                continue

            # by convention, we only have one driven torsion
            # would need to revisit if we are working with 2D torsions
            dihedral_indices = entry.td_keywords.dihedrals[0]

            # assemble matches
            for indices in matching_indices:
                if len(indices) == 2:
                    if sorted(indices) == sorted(dihedral_indices[1:3]):
                        tdrs.append(entry)
                        break
                elif len(indices) == 4:
                    if sorted(indices) == sorted(dihedral_indices):
                        tdrs.append(entry)
                        break
                else:
                    raise ValueError("Number of indices returned not 2 or 4;" 
                                     " check number of tagged atoms in SMARTS")

    return tdrs

コード例 #7

0

ファイルを表示

def torsion_barrier_for_molecule(tdr_object, mapped_smiles, show_plots=False):
    """
    Takes in a single torsion drive record that has energies from multiple conformers (at different torsion angles),
    evaluates the torsion barrier

    Parameters
    ----------
    tdr_object : object
        torsion drive record from QC archive for a molecule

    Returns
    -------
    mol: oemol object
        oemol from the smiles in dataframe index that contains datatags with the following:
        tdr_object.id : int (id of the TD record) with datatag "TDid"
        dihedral_indices: list (list of atom indices for which torsion is driven in this record) datatag "TDindices"
        torsion_barrier: float (torsion barrier energy in KJ/mol, maximum of all the barriers) datatag "TB"
        cmiles: str (string for the cmiles of the molecule in canonical_isomeric_explicit_hydrogen_mapped_smiles)
        datatag "cmiles"
    """
    energies = list(tdr_object.get_final_energies().values())
    tmp = list(tdr_object.get_final_energies().keys())
    angles = [i[0] * np.pi / 180 for i in tmp]
    angles, energies = zip(*sorted(zip(angles, energies)))
    angles = np.array(angles)
    energies = np.array(energies)
    angles = np.append(
        angles[-3:] - 2 * np.pi, np.append(angles, angles[:3] + 2 * np.pi)
    )
    energies = np.append(energies[-3:], np.append(energies, energies[:3]))

    idx = []
    for i in range(len(angles) - 2):
        m1 = (energies[i + 1] - energies[i]) / (angles[i + 1] - angles[i])
        m2 = (energies[i + 2] - energies[i + 1]) / (angles[i + 2] - angles[i + 1])
        if np.sign(m1) == np.sign(m2):
            continue
        else:
            idx.append(i + 1)

    if show_plots:
        min_ener = min(energies)
        energies_y = (energies - min_ener) * HARTREE_2_KJMOL
        fontsize = 14
        plt.figure()
        plt.plot(
            angles * 180 / np.pi,
            energies_y,
            "b-X",
            angles[idx] * 180 / np.pi,
            energies_y[idx],
            "ro",
        )
        plt.legend(["QM data", "Max, min"], bbox_to_anchor=(1, 1), fontsize=fontsize)
        plt.title("Torsion drive interpolation", fontsize=fontsize)
        plt.xlabel("Dihedral Angle [Degrees]", fontsize=fontsize)
        plt.ylabel("Relative energy [KJ / mol]", fontsize=fontsize)
        plt.xticks(fontsize=fontsize)
        plt.yticks(fontsize=fontsize)
        fig_name = "plot_" + tdr_object.id + ".png"
        plt.savefig(fig_name)
        plt.show()

    torsion_barriers = []
    for i in range(int(len(idx) - 1)):
        torsion_barriers.append(
            abs(HARTREE_2_KJMOL * abs(energies[idx[i]] - energies[idx[i + 1]]))
        )
    torsion_barriers = np.array(torsion_barriers)

    # get dihedral indices and pass on to get_wbo function
    dihedral_indices = tdr_object.dict()["keywords"]["dihedrals"][0]
    offmol = Molecule.from_mapped_smiles(mapped_smiles)
    offmol.assign_fractional_bond_orders()
    bond = offmol.get_bond_between(dihedral_indices[1], dihedral_indices[2])
    mol = chemi.smiles_to_oemol(mapped_smiles)
    mol.SetData("WBO", bond.fractional_bond_order)
    mol.SetData("TB", max(torsion_barriers))
    mol.SetData("TDindices", dihedral_indices)
    mol.SetData("TDid", tdr_object.id)
    mol.SetData("cmiles", mapped_smiles)

    return mol

コード例 #8

0

ファイルを表示

def loadDataset_low(datasetName, specification, benchmark_smiles, qca_overlapped_entries):
    """
    Low level call to load each torsion drive dataset and return a list of molecules

        Parameters
        ----------
        datasetName : str
            torsion drive dataset name.
        specification : str
            specification in the dataset. Example: "B3LYP-D3", "default", "UFF"

        Returns
        -------
        molList : list of objects
            each row contains the tdr_object.id, dihedral_indices, torsion_barrier, oemol_object
    """
    while True:
        try:
            assert datasetName
            break
        except AssertionError:
            print("datasetName is empty. Check input list of dataset tuples")
            raise
    while True:
        try:
            assert specification
            break
        except AssertionError:
            print("specification is empty. Check input list of dataset tuples")
            raise

    # initiate qc portal instance
    client = ptl.FractalClient()
    # from the TorsionDriveDataset collection picking up given datasetName
    ds = client.get_collection("TorsionDriveDataset", datasetName)
    ds.status([specification], status="COMPLETE")

    # Serial implementation

    # Hardcoding benchmark molecules from the lim_mobley_parsely_benchmark
    # https://openforcefield.org/force-fields/force-fields/
    # https://github.com/MobleyLab/benchmarkff/blob/91476147f35579bc52bf984839fd20c72a61d76d/molecules/set_v03_non_redundant/trim3_full_qcarchive.smi
    
    with open(benchmark_smiles) as f:
        bm_smiles = f.readlines()
    bm_mols = [Molecule.from_smiles(smiles) for smiles in bm_smiles]
    
    tb = []
    overlaps = 0
    qca_entries = []
    
    for i in range(ds.df.size):
        if ds.df.iloc[i, 0].status == "COMPLETE":
            smiles = ds.df.index[i]
            mapped_smiles = ds.get_entry(smiles).attributes[
                "canonical_isomeric_explicit_hydrogen_mapped_smiles"
            ]
            mol1 = Molecule.from_mapped_smiles(mapped_smiles)
            not_identical = True
            for mol in bm_mols:
                isomorphic,atom_map = Molecule.are_isomorphic(mol1, 
                                                  mol,
                                                  return_atom_map=False,
                                                  aromatic_matching=False,
                                                  formal_charge_matching=False,
                                                  bond_order_matching=False,
                                                  atom_stereochemistry_matching=False,
                                                  bond_stereochemistry_matching=False,
                                                          )
                if(isomorphic):
                    not_identical = False
                    overlaps += 1
                    entry = ds.get_entry(smiles)
                    tdr_id = entry.object_map['default']
#                     print(tdr_id)
                    qca_entries.append(tdr_id)
                    break
            if(not_identical): 
                tb.append(torsion_barrier_for_molecule(ds.df.iloc[i, 0], mapped_smiles))
    
    # overlaps_qca_ids.txt is also a hardcoded file
    with open(qca_overlapped_entries, "a") as f:
        for item in qca_entries:
            f.write("%s\n" % item)
        
    print("No. of overlaps with benchmark set, qca entries added to overlaps_qca_ids.txt: ", overlaps)
    print("No. of COMPLETE and not overlapping with benchmark in this dataset:", len(tb), "out of ", len(ds.df))
    return tb

コード例 #9

0

ファイルを表示

    def fragment(self, molecule: Molecule) -> List[FragmentData]:
        """
        Fragment the molecule using the WBOFragmenter.

        Parameters:
            molecule: The openff molecule to be fragmented using the provided class settings

        Returns:
            A list of FragmentData schema which details how a parent molecule is related to a fragment and which bond
            we fragmented around.

        Raises:
            FragmenterError: If the molecule can not be fragmented.
        """
        from fragmenter import fragment

        # make sure the molecule has at least one conformer as this can cause issues
        if molecule.n_conformers == 0:
            molecule.generate_conformers(n_conformers=1)

        # set up the fragmenter
        fragment_factory = fragment.WBOFragmenter(
            molecule=molecule.to_openeye(), verbose=False)

        fragments: List[FragmentData] = []
        try:
            # fragment the molecule
            fragment_factory.fragment(
                threshold=self.wbo_threshold,
                keep_non_rotor_ring_substituents=self.
                keep_non_rotor_ring_substituents,
            )
            # now we work out the relation between the fragment and the parent
            fragments_data = fragment_factory.to_torsiondrive_json()
            # now store the data
            for data in fragments_data.values():
                off_frag = Molecule.from_mapped_smiles(
                    data["identifiers"]
                    ["canonical_isomeric_explicit_hydrogen_mapped_smiles"])
                # get the fragment parent mapping
                frag_dihedral = data["dihedral"][0][1:3]

                # in some cases we get one fragment back which is the parent molecule
                # we should not work out a mapping
                if not molecule.is_isomorphic_with(off_frag):
                    mapping = self._get_fragment_parent_mapping(
                        fragment=off_frag, parent=molecule)
                    # get the parent torsion
                    parent_dihedral = tuple(
                        [mapping[i] for i in frag_dihedral])
                    parent_molecule = molecule
                else:
                    # reuse the current fragment data as dummy parent data
                    mapping = dict((i, i) for i in range(molecule.n_atoms))
                    parent_dihedral = frag_dihedral
                    parent_molecule = off_frag
                # this is the data we need so make the fragmnetdata
                frag_data = FragmentData(
                    parent_molecule=parent_molecule,
                    parent_torsion=parent_dihedral,
                    fragment_molecule=off_frag,
                    fragment_torsion=frag_dihedral,
                    fragment_attributes=data["identifiers"],
                    fragment_parent_mapping=mapping,
                )
                fragments.append(frag_data)

            return fragments

        except RuntimeError:
            raise FragmenterError(
                f"The molecule {molecule} could not be fragmented so no fitting target was made."
            )