def test_rdkit_from_smiles_hydrogens_are_explicit(self): """ Test to ensure that RDKitToolkitWrapper.from_smiles has the proper behavior with respect to its hydrogens_are_explicit kwarg """ toolkit_wrapper = RDKitToolkitWrapper() smiles_impl = "C#C" with pytest.raises( ValueError, match= "but RDKit toolkit interpreted SMILES 'C#C' as having implicit hydrogen" ) as excinfo: offmol = Molecule.from_smiles(smiles_impl, toolkit_registry=toolkit_wrapper, hydrogens_are_explicit=True) offmol = Molecule.from_smiles(smiles_impl, toolkit_registry=toolkit_wrapper, hydrogens_are_explicit=False) assert offmol.n_atoms == 4 smiles_expl = "[H][C]#[C][H]" offmol = Molecule.from_smiles(smiles_expl, toolkit_registry=toolkit_wrapper, hydrogens_are_explicit=True) assert offmol.n_atoms == 4 # It's debatable whether this next function should pass. Strictly speaking, the hydrogens in this SMILES # _are_ explicit, so allowing "hydrogens_are_explicit=False" through here is allowing a contradiction. # We might rethink the name of this kwarg. offmol = Molecule.from_smiles(smiles_expl, toolkit_registry=toolkit_wrapper, hydrogens_are_explicit=False) assert offmol.n_atoms == 4
def test_smiles_missing_stereochemistry(self, smiles, exception_regex): """Test RDKitToolkitWrapper to_smiles() and from_smiles() when given ambiguous stereochemistry""" toolkit_wrapper = RDKitToolkitWrapper() if exception_regex is not None: with pytest.raises(UndefinedStereochemistryError, match=exception_regex): Molecule.from_smiles(smiles, toolkit_registry=toolkit_wrapper) else: Molecule.from_smiles(smiles, toolkit_registry=toolkit_wrapper)
def test_compute_partial_charges_net_charge(self): """Test OpenEyeToolkitWrapper compute_partial_charges() on a molecule with a net +1 charge""" toolkit_registry = ToolkitRegistry( toolkit_precedence=[AmberToolsToolkitWrapper, RDKitToolkitWrapper]) smiles = '[H]C([H])([H])[N+]([H])([H])[H]' molecule = Molecule.from_smiles(smiles, toolkit_registry=toolkit_registry) molecule.generate_conformers(toolkit_registry=toolkit_registry) with pytest.raises(NotImplementedError) as excinfo: charge_model = 'notARealChargeModel' molecule.compute_partial_charges(toolkit_registry=toolkit_registry ) #, charge_model=charge_model) # TODO: Figure out why ['cm1', 'cm2'] fail for charge_model in ['gas', 'mul', 'bcc']: with pytest.raises(NotImplementedError) as excinfo: molecule.compute_partial_charges( toolkit_registry=toolkit_registry ) #, charge_model=charge_model) charge_sum = 0 * unit.elementary_charge for pc in molecule._partial_charges: charge_sum += pc assert 0.99 * unit.elementary_charge < charge_sum < 1.01 * unit.elementary_charge # For now, I'm just testing AM1-BCC (will test more when the SMIRNOFF spec for other charges is finalized) molecule.compute_partial_charges_am1bcc( toolkit_registry=toolkit_registry) charge_sum = 0 * unit.elementary_charge for pc in molecule._partial_charges: charge_sum += pc assert 0.999 * unit.elementary_charge < charge_sum < 1.001 * unit.elementary_charge
def test_compute_partial_charges(self): """Test OpenEyeToolkitWrapper compute_partial_charges()""" toolkit_registry = ToolkitRegistry( toolkit_precedence=[AmberToolsToolkitWrapper, RDKitToolkitWrapper]) smiles = '[H]C([H])([H])C([H])([H])[H]' molecule = Molecule.from_smiles(smiles, toolkit_registry=toolkit_registry) molecule.generate_conformers(toolkit_registry=toolkit_registry) # TODO: Implementation of these tests is pending a decision on the API for our charge model with pytest.raises(NotImplementedError) as excinfo: charge_model = 'notARealChargeModel' molecule.compute_partial_charges(toolkit_registry=toolkit_registry ) #, charge_model=charge_model) # ['cm1', 'cm2'] for charge_model in ['gas', 'mul', 'bcc']: with pytest.raises(NotImplementedError) as excinfo: molecule.compute_partial_charges( toolkit_registry=toolkit_registry ) #, charge_model=charge_model) charge_sum = 0 * unit.elementary_charge for pc in molecule._partial_charges: charge_sum += pc assert charge_sum < 0.01 * unit.elementary_charge # For now, just test AM1-BCC while the SMIRNOFF spec for other charge models gets worked out molecule.compute_partial_charges_am1bcc( toolkit_registry=toolkit_registry) # , charge_model=charge_model) charge_sum = 0 * unit.elementary_charge for pc in molecule._partial_charges: charge_sum += pc assert charge_sum < 0.002 * unit.elementary_charge
def test_negated_atom_smarts(): a = offsb.chem.types.AtomType.from_string("[*]") a._H[0:2] = False a._r[1:4] = False a._symbol[0:2] = False a._aA[1] = False a._X[:2] = False a._x[:] = False a._x[0] = True smarts = a.to_smarts(tag=True) mol = Molecule.from_smiles("CCO") top = mol.to_topology() matches = top.chemical_environment_matches(smarts) assert len(matches) == 2 for i in [ "!#1", "!H1", "!H0", "!X1", "x0", "!r5", "!r4", "!r3", "A", ":1" ]: # assert smarts == '[!#1;!H1!H0;!X1;x0;!r5!r4!r3;A:1]' # guard against future cases that might reorder and give false negatives assert i in smarts
def test_compute_partial_charges_trans_cooh_am1bcc(self): """Test OpenEyeToolkitWrapper for computing partial charges for problematic molecules, as exemplified by Issue 346 (https://github.com/openforcefield/openforcefield/issues/346)""" lysine = Molecule.from_smiles("C(CC[NH3+])C[C@@H](C(=O)O)N") toolkit_wrapper = OpenEyeToolkitWrapper() lysine.generate_conformers(toolkit_registry=toolkit_wrapper) lysine.compute_partial_charges_am1bcc(toolkit_registry=toolkit_wrapper)
def test_smiles_charged(self): """Test OpenEyeToolkitWrapper functions for reading/writing charged SMILES""" toolkit_wrapper = OpenEyeToolkitWrapper() # This differs from RDKit's expected output due to different canonicalization schemes smiles = '[H]C([H])([H])[N+]([H])([H])[H]' molecule = Molecule.from_smiles(smiles, toolkit_registry=toolkit_wrapper) smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper) assert smiles == smiles2
def test_smiles_add_H(self): """Test OpenEyeToolkitWrapper for adding explicit hydrogens""" toolkit_wrapper = OpenEyeToolkitWrapper() # This differs from RDKit's SMILES due to different canonicalization schemes input_smiles = 'CC' expected_output_smiles = '[H]C([H])([H])C([H])([H])[H]' molecule = Molecule.from_smiles(input_smiles, toolkit_registry=toolkit_wrapper) smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper) assert expected_output_smiles == smiles2
def test_smiles_add_H(self): """Test RDKitToolkitWrapper to_smiles() and from_smiles()""" toolkit_wrapper = RDKitToolkitWrapper() input_smiles = 'CC' # This differs from OE's expected output due to different canonicalization schemes expected_output_smiles = '[H][C]([H])([H])[C]([H])([H])[H]' molecule = Molecule.from_smiles(input_smiles, toolkit_registry=toolkit_wrapper) smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper) assert smiles2 == expected_output_smiles
def test_smiles(self): """Test OpenEyeToolkitWrapper to_smiles() and from_smiles()""" toolkit_wrapper = OpenEyeToolkitWrapper() # This differs from RDKit's SMILES due to different canonicalization schemes smiles = '[H]C([H])([H])C([H])([H])[H]' molecule = Molecule.from_smiles(smiles, toolkit_registry=toolkit_wrapper) smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper) assert smiles == smiles2
def test_smiles_missing_stereochemistry(self): """Test OpenEyeToolkitWrapper to_smiles() and from_smiles()""" toolkit_wrapper = OpenEyeToolkitWrapper() unspec_chiral_smiles = r"C\C(F)=C(/F)CC(C)(Cl)Br" spec_chiral_smiles = r"C\C(F)=C(/F)C[C@@](C)(Cl)Br" unspec_db_smiles = r"CC(F)=C(F)C[C@@](C)(Cl)Br" spec_db_smiles = r"C\C(F)=C(/F)C[C@@](C)(Cl)Br" for title, smiles, raises_exception in [ ("unspec_chiral_smiles", unspec_chiral_smiles, True), ("spec_chiral_smiles", spec_chiral_smiles, False), ("unspec_db_smiles", unspec_db_smiles, True), ("spec_db_smiles", spec_db_smiles, False), ]: if raises_exception: with pytest.raises(UndefinedStereochemistryError) as context: molecule = Molecule.from_smiles( smiles, toolkit_registry=toolkit_wrapper) else: molecule = Molecule.from_smiles( smiles, toolkit_registry=toolkit_wrapper)
def test_to_from_openeye_core_props_unset(self): """Test OpenEyeToolkitWrapper to_openeye() and from_openeye() when given empty core property fields""" toolkit_wrapper = OpenEyeToolkitWrapper() # Using a simple molecule with tetrahedral and bond stereochemistry input_smiles = r'C\C(F)=C(/F)C[C@](C)(Cl)Br' expected_output_smiles = r'[H]C([H])([H])/C(=C(/C([H])([H])[C@](C([H])([H])[H])(Cl)Br)\F)/F' molecule = Molecule.from_smiles(input_smiles, toolkit_registry=toolkit_wrapper) assert molecule.to_smiles( toolkit_registry=toolkit_wrapper) == expected_output_smiles # Ensure one atom has its stereochemistry specified central_carbon_stereo_specified = False for atom in molecule.atoms: if (atom.atomic_number == 6) and atom.stereochemistry == "R": central_carbon_stereo_specified = True assert central_carbon_stereo_specified # Do a first conversion to/from oemol rdmol = molecule.to_openeye() molecule2 = Molecule.from_openeye(rdmol) # Test that properties survived first conversion assert molecule.name == molecule2.name # NOTE: This expects the same indexing scheme in the original and new molecule central_carbon_stereo_specified = False for atom in molecule2.atoms: if (atom.atomic_number == 6) and atom.stereochemistry == "R": central_carbon_stereo_specified = True assert central_carbon_stereo_specified for atom1, atom2 in zip(molecule.atoms, molecule2.atoms): assert atom1.to_dict() == atom2.to_dict() for bond1, bond2 in zip(molecule.bonds, molecule2.bonds): assert bond1.to_dict() == bond2.to_dict() assert (molecule._conformers == None) assert (molecule2._conformers == None) for pc1, pc2 in zip(molecule._partial_charges, molecule2._partial_charges): pc1_ul = pc1 / unit.elementary_charge pc2_ul = pc2 / unit.elementary_charge assert_almost_equal(pc1_ul, pc2_ul, decimal=6) assert molecule2.to_smiles( toolkit_registry=toolkit_wrapper) == expected_output_smiles
def expand_smiles_to_qcschema( smi, cutoff=None, n_confs=1, unique_smiles=True, isomer_max=-1, ): """ Load a file containing smiles strings, and generate stereoisomers and conformers for each stereoisomer. Parameters ---------- input_fnm : str The input filename to read SMILES from cutoff : float During the all-pairwise RMSD calculation, remove molecules that are less than this cutoff value apart n_confs : int The number of conformations to attempt generating unique_smiles : bool If stereoisomers are generated, organize molecules by their unambiguous SMILES string isomers : int The number of stereoisomers to keep if multiple are found. The default of -1 means keep all found. line_start : int The line in the input file to start processing line_end : int The line in the input file to stop processing (not inclusive) skip_rows : int The number of lines at the top of the file to skip before data begins output_fid : FileHandle the file object to write to. Must support the write function Returns ------- mols : dict Keys are the smiles from the input file, and the value is a list of OpenFF molecules with conformers attached. output : str The contents of what was written to output_fid """ # TODO: unique_smiles=False is broken as it repeats isomers for some reason unique_smiles = True # Initializing i = 0 rmsd_cutoff = cutoff # this is the main object returned molecule_set = {} ref_smi = smi try: # If this fails, probably due to stereochemistry. Catch the # exception, then enumerate the variations on the SMILES. mol = Molecule.from_smiles(smi).to_rdkit() smi_list = [mol] except openforcefield.utils.toolkits.UndefinedStereochemistryError: smi_list = list(EnumerateStereoisomers(Chem.MolFromSmiles(smi))) # Clip the isomers here if a limit was specified if isomer_max > 0: smi_list = smi_list[:isomer_max] for i, mol in enumerate(smi_list): smi_list[i] = Chem.AddHs(mol) for atom in smi_list[i].GetAtoms(): atom.SetAtomMapNum(atom.GetIdx() + 1) smi_list = [ smi for smi in sorted( Chem.MolToSmiles( x, isomericSmiles=True, allHsExplicit=True, canonical=True, allBondsExplicit=False, ) for x in smi_list) ] if unique_smiles: # we are collecting molecules by their specific stereoisomer SMILES for smi in smi_list: try: # this is ridiculous; we enumerated stereoisomers previously, # but we still fail to build the molecule. Silently allow... # note that this is likely because there is still bond # stereochemistry lvl = logging.getLogger("openforcefield").getEffectiveLevel() logging.getLogger("openforcefield").setLevel(logging.ERROR) molecule_set[smi] = [ Molecule.from_smiles(smi, allow_undefined_stereo=True) ] logging.getLogger("openforcefield").setLevel(lvl) except openforcefield.utils.toolkits.UndefinedStereochemistryError: # RDKit was unable to determine chirality? Skip... pass else: mols = [] for smi in smi_list: mols.append(Molecule.from_smiles(smi)) molecule_set[ref_smi] = mols for smi in smi_list: # Some book keeping to make sure that the stereoisomer SMILES # is always printed to the log, but the returned data structure # follows the request input settings if unique_smiles: out_smi = smi else: out_smi = smi smi = ref_smi if smi not in molecule_set: continue for mol in molecule_set[smi]: # Not obvious, but i is the number of unique SMILES strings # generated (so far) from the input SMILES i += 1 # attempt to generate n_confs, but the actual number could be # smaller f = io.StringIO() with contextlib.redirect_stderr(f): with contextlib.redirect_stdout(f): try: mol.generate_conformers(n_conformers=n_confs) except TypeError: pass rdmol = mol.to_rdkit() L = len(mol.conformers) # This will be used to determined whether it should be pruned # from the RMSD calculations. If we find it should be pruned # just once, it is sufficient to avoid it later in the pairwise # processing. uniq = list([True] * L) # This begins the pairwise RMSD pruner if L > 1: # The reference conformer for RMSD calculation for j in range(L - 1): # A previous loop has determine this specific conformer # is too close to another, so we can entirely skip it if not uniq[j]: continue # since k starts from j+1, we are only looking at the # upper triangle of the comparisons (j < k) for k in range(j + 1, L): rmsd_i = AlignMol(rdmol, rdmol, k, j) r = np.linalg.norm(mol.conformers[k] - mol.conformers[j], axis=1) rmsd_i = r.mean() # Flag this conformer for pruning, and also # prevent it from being used as a reference in the # future comparisons if rmsd_i < rmsd_cutoff: uniq[k] = False # hack? how to set conformers explicity if different number than # currently stored? confs = [ mol.conformers[j] for j, add_bool in enumerate(uniq) if add_bool ] mol._conformers = confs.copy() if len(molecule_set) == 0: molecule_set[ref_smi] = [] return molecule_set
def test_to_from_rdkit_core_props_filled(self): """Test RDKitToolkitWrapper to_rdkit() and from_rdkit() when given populated core property fields""" toolkit_wrapper = RDKitToolkitWrapper() # Replacing with a simple molecule with stereochemistry input_smiles = r'C\C(F)=C(/F)C[C@@](C)(Cl)Br' expected_output_smiles = r'[H][C]([H])([H])/[C]([F])=[C](\[F])[C]([H])([H])[C@@]([Cl])([Br])[C]([H])([H])[H]' molecule = Molecule.from_smiles(input_smiles, toolkit_registry=toolkit_wrapper) assert molecule.to_smiles( toolkit_registry=toolkit_wrapper) == expected_output_smiles # Populate core molecule property fields molecule.name = 'Alice' partial_charges = unit.Quantity( np.array([ -.9, -.8, -.7, -.6, -.5, -.4, -.3, -.2, -.1, 0., .1, .2, .3, .4, .5, .6, .7, .8 ]), unit.elementary_charge) molecule.partial_charges = partial_charges coords = unit.Quantity( np.array([['0.0', '1.0', '2.0'], ['3.0', '4.0', '5.0'], ['6.0', '7.0', '8.0'], ['9.0', '10.0', '11.0'], ['12.0', '13.0', '14.0'], ['15.0', '16.0', '17.0'], ['18.0', '19.0', '20.0'], ['21.0', '22.0', '23.0'], ['24.0', '25.0', '26.0'], ['27.0', '28.0', '29.0'], ['30.0', '31.0', '32.0'], ['33.0', '34.0', '35.0'], ['36.0', '37.0', '38.0'], ['39.0', '40.0', '41.0'], ['42.0', '43.0', '44.0'], ['45.0', '46.0', '47.0'], ['48.0', '49.0', '50.0'], ['51.0', '52.0', '53.0']]), unit.angstrom) molecule.add_conformer(coords) # Populate core atom property fields molecule.atoms[2].name = 'Bob' # Ensure one atom has its stereochemistry specified central_carbon_stereo_specified = False for atom in molecule.atoms: if (atom.atomic_number == 6) and atom.stereochemistry == "S": central_carbon_stereo_specified = True assert central_carbon_stereo_specified # Populate bond core property fields fractional_bond_orders = [float(val) for val in range(18)] for fbo, bond in zip(fractional_bond_orders, molecule.bonds): bond.fractional_bond_order = fbo # Do a first conversion to/from oemol rdmol = molecule.to_rdkit() molecule2 = Molecule.from_rdkit(rdmol) # Test that properties survived first conversion #assert molecule.to_dict() == molecule2.to_dict() assert molecule.name == molecule2.name # NOTE: This expects the same indexing scheme in the original and new molecule central_carbon_stereo_specified = False for atom in molecule2.atoms: if (atom.atomic_number == 6) and atom.stereochemistry == "S": central_carbon_stereo_specified = True assert central_carbon_stereo_specified for atom1, atom2 in zip(molecule.atoms, molecule2.atoms): assert atom1.to_dict() == atom2.to_dict() for bond1, bond2 in zip(molecule.bonds, molecule2.bonds): assert bond1.to_dict() == bond2.to_dict() assert (molecule._conformers[0] == molecule2._conformers[0]).all() for pc1, pc2 in zip(molecule._partial_charges, molecule2._partial_charges): pc1_ul = pc1 / unit.elementary_charge pc2_ul = pc2 / unit.elementary_charge assert_almost_equal(pc1_ul, pc2_ul, decimal=6) assert molecule2.to_smiles( toolkit_registry=toolkit_wrapper) == expected_output_smiles