Python Molecule.from_smiles 예제들, openforcefield.topology.molecule.Molecule.from_smiles Python 예제들

예제 #1

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

    def test_rdkit_from_smiles_hydrogens_are_explicit(self):
        """
        Test to ensure that RDKitToolkitWrapper.from_smiles has the proper behavior with
        respect to its hydrogens_are_explicit kwarg
        """
        toolkit_wrapper = RDKitToolkitWrapper()
        smiles_impl = "C#C"
        with pytest.raises(
                ValueError,
                match=
                "but RDKit toolkit interpreted SMILES 'C#C' as having implicit hydrogen"
        ) as excinfo:
            offmol = Molecule.from_smiles(smiles_impl,
                                          toolkit_registry=toolkit_wrapper,
                                          hydrogens_are_explicit=True)
        offmol = Molecule.from_smiles(smiles_impl,
                                      toolkit_registry=toolkit_wrapper,
                                      hydrogens_are_explicit=False)
        assert offmol.n_atoms == 4

        smiles_expl = "[H][C]#[C][H]"
        offmol = Molecule.from_smiles(smiles_expl,
                                      toolkit_registry=toolkit_wrapper,
                                      hydrogens_are_explicit=True)
        assert offmol.n_atoms == 4
        # It's debatable whether this next function should pass. Strictly speaking, the hydrogens in this SMILES
        # _are_ explicit, so allowing "hydrogens_are_explicit=False" through here is allowing a contradiction.
        # We might rethink the name of this kwarg.

        offmol = Molecule.from_smiles(smiles_expl,
                                      toolkit_registry=toolkit_wrapper,
                                      hydrogens_are_explicit=False)
        assert offmol.n_atoms == 4

예제 #2

0

파일 보기

파일: test_toolkits.py 프로젝트: proteneer/openforcefield

    def test_smiles_missing_stereochemistry(self, smiles, exception_regex):
        """Test RDKitToolkitWrapper to_smiles() and from_smiles() when given ambiguous stereochemistry"""
        toolkit_wrapper = RDKitToolkitWrapper()

        if exception_regex is not None:
            with pytest.raises(UndefinedStereochemistryError,
                               match=exception_regex):
                Molecule.from_smiles(smiles, toolkit_registry=toolkit_wrapper)
        else:
            Molecule.from_smiles(smiles, toolkit_registry=toolkit_wrapper)

예제 #3

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

    def test_compute_partial_charges_net_charge(self):
        """Test OpenEyeToolkitWrapper compute_partial_charges() on a molecule with a net +1 charge"""
        toolkit_registry = ToolkitRegistry(
            toolkit_precedence=[AmberToolsToolkitWrapper, RDKitToolkitWrapper])
        smiles = '[H]C([H])([H])[N+]([H])([H])[H]'
        molecule = Molecule.from_smiles(smiles,
                                        toolkit_registry=toolkit_registry)
        molecule.generate_conformers(toolkit_registry=toolkit_registry)

        with pytest.raises(NotImplementedError) as excinfo:
            charge_model = 'notARealChargeModel'
            molecule.compute_partial_charges(toolkit_registry=toolkit_registry
                                             )  #, charge_model=charge_model)

        # TODO: Figure out why ['cm1', 'cm2'] fail
        for charge_model in ['gas', 'mul', 'bcc']:
            with pytest.raises(NotImplementedError) as excinfo:
                molecule.compute_partial_charges(
                    toolkit_registry=toolkit_registry
                )  #, charge_model=charge_model)
                charge_sum = 0 * unit.elementary_charge
                for pc in molecule._partial_charges:
                    charge_sum += pc
                assert 0.99 * unit.elementary_charge < charge_sum < 1.01 * unit.elementary_charge

        # For now, I'm just testing AM1-BCC (will test more when the SMIRNOFF spec for other charges is finalized)
        molecule.compute_partial_charges_am1bcc(
            toolkit_registry=toolkit_registry)
        charge_sum = 0 * unit.elementary_charge
        for pc in molecule._partial_charges:
            charge_sum += pc
        assert 0.999 * unit.elementary_charge < charge_sum < 1.001 * unit.elementary_charge

예제 #4

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

    def test_compute_partial_charges(self):
        """Test OpenEyeToolkitWrapper compute_partial_charges()"""
        toolkit_registry = ToolkitRegistry(
            toolkit_precedence=[AmberToolsToolkitWrapper, RDKitToolkitWrapper])

        smiles = '[H]C([H])([H])C([H])([H])[H]'
        molecule = Molecule.from_smiles(smiles,
                                        toolkit_registry=toolkit_registry)
        molecule.generate_conformers(toolkit_registry=toolkit_registry)

        # TODO: Implementation of these tests is pending a decision on the API for our charge model
        with pytest.raises(NotImplementedError) as excinfo:
            charge_model = 'notARealChargeModel'
            molecule.compute_partial_charges(toolkit_registry=toolkit_registry
                                             )  #, charge_model=charge_model)

        # ['cm1', 'cm2']
        for charge_model in ['gas', 'mul', 'bcc']:
            with pytest.raises(NotImplementedError) as excinfo:
                molecule.compute_partial_charges(
                    toolkit_registry=toolkit_registry
                )  #, charge_model=charge_model)
                charge_sum = 0 * unit.elementary_charge
                for pc in molecule._partial_charges:
                    charge_sum += pc
                assert charge_sum < 0.01 * unit.elementary_charge

        # For now, just test AM1-BCC while the SMIRNOFF spec for other charge models gets worked out
        molecule.compute_partial_charges_am1bcc(
            toolkit_registry=toolkit_registry)  # , charge_model=charge_model)
        charge_sum = 0 * unit.elementary_charge
        for pc in molecule._partial_charges:
            charge_sum += pc
        assert charge_sum < 0.002 * unit.elementary_charge

예제 #5

0

파일 보기

def test_negated_atom_smarts():

    a = offsb.chem.types.AtomType.from_string("[*]")

    a._H[0:2] = False
    a._r[1:4] = False
    a._symbol[0:2] = False
    a._aA[1] = False
    a._X[:2] = False
    a._x[:] = False
    a._x[0] = True
    smarts = a.to_smarts(tag=True)

    mol = Molecule.from_smiles("CCO")

    top = mol.to_topology()
    matches = top.chemical_environment_matches(smarts)
    assert len(matches) == 2

    for i in [
            "!#1", "!H1", "!H0", "!X1", "x0", "!r5", "!r4", "!r3", "A", ":1"
    ]:
        # assert smarts == '[!#1;!H1!H0;!X1;x0;!r5!r4!r3;A:1]'
        # guard against future cases that might reorder and give false negatives
        assert i in smarts

예제 #6

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

    def test_compute_partial_charges_trans_cooh_am1bcc(self):
        """Test OpenEyeToolkitWrapper for computing partial charges for problematic molecules, as exemplified by
        Issue 346 (https://github.com/openforcefield/openforcefield/issues/346)"""

        lysine = Molecule.from_smiles("C(CC[NH3+])C[C@@H](C(=O)O)N")
        toolkit_wrapper = OpenEyeToolkitWrapper()
        lysine.generate_conformers(toolkit_registry=toolkit_wrapper)
        lysine.compute_partial_charges_am1bcc(toolkit_registry=toolkit_wrapper)

예제 #7

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

 def test_smiles_charged(self):
     """Test OpenEyeToolkitWrapper functions for reading/writing charged SMILES"""
     toolkit_wrapper = OpenEyeToolkitWrapper()
     # This differs from RDKit's expected output due to different canonicalization schemes
     smiles = '[H]C([H])([H])[N+]([H])([H])[H]'
     molecule = Molecule.from_smiles(smiles,
                                     toolkit_registry=toolkit_wrapper)
     smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
     assert smiles == smiles2

예제 #8

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

 def test_smiles_add_H(self):
     """Test OpenEyeToolkitWrapper for adding explicit hydrogens"""
     toolkit_wrapper = OpenEyeToolkitWrapper()
     # This differs from RDKit's SMILES due to different canonicalization schemes
     input_smiles = 'CC'
     expected_output_smiles = '[H]C([H])([H])C([H])([H])[H]'
     molecule = Molecule.from_smiles(input_smiles,
                                     toolkit_registry=toolkit_wrapper)
     smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
     assert expected_output_smiles == smiles2

예제 #9

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

 def test_smiles_add_H(self):
     """Test RDKitToolkitWrapper to_smiles() and from_smiles()"""
     toolkit_wrapper = RDKitToolkitWrapper()
     input_smiles = 'CC'
     # This differs from OE's expected output due to different canonicalization schemes
     expected_output_smiles = '[H][C]([H])([H])[C]([H])([H])[H]'
     molecule = Molecule.from_smiles(input_smiles,
                                     toolkit_registry=toolkit_wrapper)
     smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
     assert smiles2 == expected_output_smiles

예제 #10

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

    def test_smiles(self):
        """Test OpenEyeToolkitWrapper to_smiles() and from_smiles()"""
        toolkit_wrapper = OpenEyeToolkitWrapper()

        # This differs from RDKit's SMILES due to different canonicalization schemes

        smiles = '[H]C([H])([H])C([H])([H])[H]'
        molecule = Molecule.from_smiles(smiles,
                                        toolkit_registry=toolkit_wrapper)
        smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
        assert smiles == smiles2

예제 #11

0

파일 보기

파일: test_toolkits.py 프로젝트: proteneer/openforcefield

    def test_smiles_missing_stereochemistry(self):
        """Test OpenEyeToolkitWrapper to_smiles() and from_smiles()"""
        toolkit_wrapper = OpenEyeToolkitWrapper()

        unspec_chiral_smiles = r"C\C(F)=C(/F)CC(C)(Cl)Br"
        spec_chiral_smiles = r"C\C(F)=C(/F)C[C@@](C)(Cl)Br"
        unspec_db_smiles = r"CC(F)=C(F)C[C@@](C)(Cl)Br"
        spec_db_smiles = r"C\C(F)=C(/F)C[C@@](C)(Cl)Br"

        for title, smiles, raises_exception in [
            ("unspec_chiral_smiles", unspec_chiral_smiles, True),
            ("spec_chiral_smiles", spec_chiral_smiles, False),
            ("unspec_db_smiles", unspec_db_smiles, True),
            ("spec_db_smiles", spec_db_smiles, False),
        ]:
            if raises_exception:
                with pytest.raises(UndefinedStereochemistryError) as context:
                    molecule = Molecule.from_smiles(
                        smiles, toolkit_registry=toolkit_wrapper)
            else:
                molecule = Molecule.from_smiles(
                    smiles, toolkit_registry=toolkit_wrapper)

예제 #12

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

    def test_to_from_openeye_core_props_unset(self):
        """Test OpenEyeToolkitWrapper to_openeye() and from_openeye() when given empty core property fields"""
        toolkit_wrapper = OpenEyeToolkitWrapper()

        # Using a simple molecule with tetrahedral and bond stereochemistry
        input_smiles = r'C\C(F)=C(/F)C[C@](C)(Cl)Br'

        expected_output_smiles = r'[H]C([H])([H])/C(=C(/C([H])([H])[C@](C([H])([H])[H])(Cl)Br)\F)/F'
        molecule = Molecule.from_smiles(input_smiles,
                                        toolkit_registry=toolkit_wrapper)
        assert molecule.to_smiles(
            toolkit_registry=toolkit_wrapper) == expected_output_smiles

        # Ensure one atom has its stereochemistry specified
        central_carbon_stereo_specified = False
        for atom in molecule.atoms:
            if (atom.atomic_number == 6) and atom.stereochemistry == "R":
                central_carbon_stereo_specified = True
        assert central_carbon_stereo_specified

        # Do a first conversion to/from oemol
        rdmol = molecule.to_openeye()
        molecule2 = Molecule.from_openeye(rdmol)

        # Test that properties survived first conversion
        assert molecule.name == molecule2.name
        # NOTE: This expects the same indexing scheme in the original and new molecule

        central_carbon_stereo_specified = False
        for atom in molecule2.atoms:
            if (atom.atomic_number == 6) and atom.stereochemistry == "R":
                central_carbon_stereo_specified = True
        assert central_carbon_stereo_specified
        for atom1, atom2 in zip(molecule.atoms, molecule2.atoms):
            assert atom1.to_dict() == atom2.to_dict()
        for bond1, bond2 in zip(molecule.bonds, molecule2.bonds):
            assert bond1.to_dict() == bond2.to_dict()
        assert (molecule._conformers == None)
        assert (molecule2._conformers == None)
        for pc1, pc2 in zip(molecule._partial_charges,
                            molecule2._partial_charges):
            pc1_ul = pc1 / unit.elementary_charge
            pc2_ul = pc2 / unit.elementary_charge
            assert_almost_equal(pc1_ul, pc2_ul, decimal=6)
        assert molecule2.to_smiles(
            toolkit_registry=toolkit_wrapper) == expected_output_smiles

예제 #13

0

파일 보기

파일: load.py 프로젝트: MobleyLab/openff-spellbook

def expand_smiles_to_qcschema(
    smi,
    cutoff=None,
    n_confs=1,
    unique_smiles=True,
    isomer_max=-1,
):
    """
    Load a file containing smiles strings, and generate stereoisomers and
    conformers for each stereoisomer.

    Parameters
    ----------
    input_fnm : str
        The input filename to read SMILES from
    cutoff : float
        During the all-pairwise RMSD calculation, remove
        molecules that are less than this cutoff value apart
    n_confs : int
        The number of conformations to attempt generating
    unique_smiles : bool
        If stereoisomers are generated, organize molecules by
        their unambiguous SMILES string
    isomers : int
        The number of stereoisomers to keep if multiple are found.
        The default of -1 means keep all found.
    line_start : int
        The line in the input file to start processing
    line_end : int
        The line in the input file to stop processing (not inclusive)
    skip_rows : int
        The number of lines at the top of the file to skip before
        data begins
    output_fid : FileHandle
        the file object to write to. Must support the write function

    Returns
    -------
    mols : dict
        Keys are the smiles from the input file, and the value is a
        list of OpenFF molecules with conformers attached.
    output : str
        The contents of what was written to output_fid
    """

    # TODO: unique_smiles=False is broken as it repeats isomers for some reason
    unique_smiles = True

    # Initializing
    i = 0
    rmsd_cutoff = cutoff

    # this is the main object returned
    molecule_set = {}

    ref_smi = smi

    try:
        # If this fails, probably due to stereochemistry. Catch the
        # exception, then enumerate the variations on the SMILES.
        mol = Molecule.from_smiles(smi).to_rdkit()

        smi_list = [mol]

    except openforcefield.utils.toolkits.UndefinedStereochemistryError:

        smi_list = list(EnumerateStereoisomers(Chem.MolFromSmiles(smi)))

        # Clip the isomers here if a limit was specified
        if isomer_max > 0:
            smi_list = smi_list[:isomer_max]

    for i, mol in enumerate(smi_list):
        smi_list[i] = Chem.AddHs(mol)
        for atom in smi_list[i].GetAtoms():
            atom.SetAtomMapNum(atom.GetIdx() + 1)

    smi_list = [
        smi for smi in sorted(
            Chem.MolToSmiles(
                x,
                isomericSmiles=True,
                allHsExplicit=True,
                canonical=True,
                allBondsExplicit=False,
            ) for x in smi_list)
    ]

    if unique_smiles:
        # we are collecting molecules by their specific stereoisomer SMILES
        for smi in smi_list:
            try:
                # this is ridiculous; we enumerated stereoisomers previously,
                # but we still fail to build the molecule. Silently allow...
                # note that this is likely because there is still bond
                # stereochemistry
                lvl = logging.getLogger("openforcefield").getEffectiveLevel()
                logging.getLogger("openforcefield").setLevel(logging.ERROR)
                molecule_set[smi] = [
                    Molecule.from_smiles(smi, allow_undefined_stereo=True)
                ]
                logging.getLogger("openforcefield").setLevel(lvl)
            except openforcefield.utils.toolkits.UndefinedStereochemistryError:
                # RDKit was unable to determine chirality? Skip...
                pass

    else:
        mols = []
        for smi in smi_list:
            mols.append(Molecule.from_smiles(smi))
        molecule_set[ref_smi] = mols

    for smi in smi_list:

        # Some book keeping to make sure that the stereoisomer SMILES
        # is always printed to the log, but the returned data structure
        # follows the request input settings
        if unique_smiles:
            out_smi = smi
        else:
            out_smi = smi
            smi = ref_smi

        if smi not in molecule_set:
            continue

        for mol in molecule_set[smi]:

            # Not obvious, but i is the number of unique SMILES strings
            # generated (so far) from the input SMILES
            i += 1

            # attempt to generate n_confs, but the actual number could be
            # smaller
            f = io.StringIO()
            with contextlib.redirect_stderr(f):
                with contextlib.redirect_stdout(f):
                    try:
                        mol.generate_conformers(n_conformers=n_confs)
                    except TypeError:
                        pass

            rdmol = mol.to_rdkit()

            L = len(mol.conformers)
            # This will be used to determined whether it should be pruned
            # from the RMSD calculations. If we find it should be pruned
            # just once, it is sufficient to avoid it later in the pairwise
            # processing.
            uniq = list([True] * L)

            # This begins the pairwise RMSD pruner
            if L > 1:

                # The reference conformer for RMSD calculation
                for j in range(L - 1):

                    # A previous loop has determine this specific conformer
                    # is too close to another, so we can entirely skip it
                    if not uniq[j]:
                        continue

                    # since k starts from j+1, we are only looking at the
                    # upper triangle of the comparisons (j < k)
                    for k in range(j + 1, L):

                        rmsd_i = AlignMol(rdmol, rdmol, k, j)
                        r = np.linalg.norm(mol.conformers[k] -
                                           mol.conformers[j],
                                           axis=1)
                        rmsd_i = r.mean()

                        # Flag this conformer for pruning, and also
                        # prevent it from being used as a reference in the
                        # future comparisons
                        if rmsd_i < rmsd_cutoff:
                            uniq[k] = False

                # hack? how to set conformers explicity if different number than
                # currently stored?
                confs = [
                    mol.conformers[j] for j, add_bool in enumerate(uniq)
                    if add_bool
                ]
                mol._conformers = confs.copy()

    if len(molecule_set) == 0:
        molecule_set[ref_smi] = []

    return molecule_set

예제 #14

0

파일 보기

파일: test_toolkits.py 프로젝트: z5476t4508/openforcefield

    def test_to_from_rdkit_core_props_filled(self):
        """Test RDKitToolkitWrapper to_rdkit() and from_rdkit() when given populated core property fields"""
        toolkit_wrapper = RDKitToolkitWrapper()

        # Replacing with a simple molecule with stereochemistry
        input_smiles = r'C\C(F)=C(/F)C[C@@](C)(Cl)Br'
        expected_output_smiles = r'[H][C]([H])([H])/[C]([F])=[C](\[F])[C]([H])([H])[C@@]([Cl])([Br])[C]([H])([H])[H]'
        molecule = Molecule.from_smiles(input_smiles,
                                        toolkit_registry=toolkit_wrapper)
        assert molecule.to_smiles(
            toolkit_registry=toolkit_wrapper) == expected_output_smiles

        # Populate core molecule property fields
        molecule.name = 'Alice'
        partial_charges = unit.Quantity(
            np.array([
                -.9, -.8, -.7, -.6, -.5, -.4, -.3, -.2, -.1, 0., .1, .2, .3,
                .4, .5, .6, .7, .8
            ]), unit.elementary_charge)
        molecule.partial_charges = partial_charges
        coords = unit.Quantity(
            np.array([['0.0', '1.0', '2.0'], ['3.0', '4.0', '5.0'],
                      ['6.0', '7.0', '8.0'], ['9.0', '10.0', '11.0'],
                      ['12.0', '13.0', '14.0'], ['15.0', '16.0', '17.0'],
                      ['18.0', '19.0', '20.0'], ['21.0', '22.0', '23.0'],
                      ['24.0', '25.0', '26.0'], ['27.0', '28.0', '29.0'],
                      ['30.0', '31.0', '32.0'], ['33.0', '34.0', '35.0'],
                      ['36.0', '37.0', '38.0'], ['39.0', '40.0', '41.0'],
                      ['42.0', '43.0', '44.0'], ['45.0', '46.0', '47.0'],
                      ['48.0', '49.0', '50.0'], ['51.0', '52.0', '53.0']]),
            unit.angstrom)
        molecule.add_conformer(coords)
        # Populate core atom property fields
        molecule.atoms[2].name = 'Bob'
        # Ensure one atom has its stereochemistry specified
        central_carbon_stereo_specified = False
        for atom in molecule.atoms:
            if (atom.atomic_number == 6) and atom.stereochemistry == "S":
                central_carbon_stereo_specified = True
        assert central_carbon_stereo_specified

        # Populate bond core property fields
        fractional_bond_orders = [float(val) for val in range(18)]
        for fbo, bond in zip(fractional_bond_orders, molecule.bonds):
            bond.fractional_bond_order = fbo

        # Do a first conversion to/from oemol
        rdmol = molecule.to_rdkit()
        molecule2 = Molecule.from_rdkit(rdmol)

        # Test that properties survived first conversion
        #assert molecule.to_dict() == molecule2.to_dict()
        assert molecule.name == molecule2.name
        # NOTE: This expects the same indexing scheme in the original and new molecule

        central_carbon_stereo_specified = False
        for atom in molecule2.atoms:
            if (atom.atomic_number == 6) and atom.stereochemistry == "S":
                central_carbon_stereo_specified = True
        assert central_carbon_stereo_specified
        for atom1, atom2 in zip(molecule.atoms, molecule2.atoms):
            assert atom1.to_dict() == atom2.to_dict()
        for bond1, bond2 in zip(molecule.bonds, molecule2.bonds):
            assert bond1.to_dict() == bond2.to_dict()
        assert (molecule._conformers[0] == molecule2._conformers[0]).all()
        for pc1, pc2 in zip(molecule._partial_charges,
                            molecule2._partial_charges):
            pc1_ul = pc1 / unit.elementary_charge
            pc2_ul = pc2 / unit.elementary_charge
            assert_almost_equal(pc1_ul, pc2_ul, decimal=6)
        assert molecule2.to_smiles(
            toolkit_registry=toolkit_wrapper) == expected_output_smiles