def test_energies(self): """Test potential energies match between openff-toolkit and OpenMM ForceField""" # DEBUG from openff.toolkit.topology import Molecule molecule = Molecule.from_smiles('C=O') molecule.generate_conformers(n_conformers=1) from simtk import unit molecule.conformers[0][0,0] += 0.1*unit.angstroms self.molecules.insert(0, molecule) # Test all supported SMIRNOFF force fields for small_molecule_forcefield in SMIRNOFFTemplateGenerator.INSTALLED_FORCEFIELDS: print(f'Testing energies for {small_molecule_forcefield}...') # Create a generator that knows about a few molecules # TODO: Should the generator also load the appropriate force field files into the ForceField object? generator = SMIRNOFFTemplateGenerator(molecules=self.molecules, forcefield=small_molecule_forcefield) # Create a ForceField import simtk openmm_forcefield = simtk.openmm.app.ForceField() # Register the template generator openmm_forcefield.registerTemplateGenerator(generator.generator) # Parameterize some molecules for molecule in self.molecules: # Create OpenMM System using OpenMM app from simtk.openmm.app import NoCutoff openmm_system = openmm_forcefield.createSystem(molecule.to_topology().to_openmm(), removeCMMotion=False, onbondedMethod=NoCutoff) # Retrieve System generated by the SMIRNOFF typing engine smirnoff_system = generator.get_openmm_system(molecule) # Compare energies and forces self.compare_energies(molecule, openmm_system, smirnoff_system) # Run some dynamics molecule = self.propagate_dynamics(molecule, smirnoff_system) # Compare energies again self.compare_energies(molecule, openmm_system, smirnoff_system)
def _oe_render_parent( parent: Molecule, rotor_bonds: Optional[Collection[BondTuple]] = None, image_width: int = 572, image_height: int = 198, ) -> str: from openeye import oedepict rotor_bonds = [] if rotor_bonds is None else rotor_bonds # Map the OpenFF molecules into OE ones, making sure to explicitly set the atom # map on the OE object as this is not handled by the OpenFF toolkit. oe_parent = parent.to_openeye() for atom in oe_parent.GetAtoms(): atom.SetMapIdx(get_map_index(parent, atom.GetIdx(), False)) oedepict.OEPrepareDepiction(oe_parent) # Set-up common display options. image = oedepict.OEImage(image_width, image_height) display_options = oedepict.OE2DMolDisplayOptions( image_width, image_height, oedepict.OEScale_AutoScale) display_options.SetTitleLocation(oedepict.OETitleLocation_Hidden) display_options.SetAtomColorStyle( oedepict.OEAtomColorStyle_WhiteMonochrome) display_options.SetAtomLabelFontScale(1.2) display_options.SetBondPropertyFunctor(_oe_wbo_label_display(rotor_bonds)) display = oedepict.OE2DMolDisplay(oe_parent, display_options) oedepict.OERenderMolecule(image, display) svg_contents = oedepict.OEWriteImageToString("svg", image) return svg_contents.decode()
def test_angle_potential_handler(self): top = Topology.from_molecules(Molecule.from_smiles("CCC")) angle_handler = AngleHandler(version=0.3) angle_parameter = AngleHandler.AngleType( smirks="[*:1]~[*:2]~[*:3]", k=2.5 * omm_unit.kilocalorie_per_mole / omm_unit.radian**2, angle=100 * omm_unit.degree, id="b1000", ) angle_handler.add_parameter(angle_parameter.to_dict()) from openff.system.stubs import ForceField forcefield = ForceField() forcefield.register_parameter_handler(angle_handler) angle_potentials = forcefield["Angles"].create_potential(top) top_key = TopologyKey(atom_indices=(0, 1, 2)) pot = angle_potentials.potentials[angle_potentials.slot_map[top_key]] kcal_mol_rad2 = unit.Unit("kilocalorie / (mole * radian ** 2)") assert pot.parameters["k"].to( kcal_mol_rad2).magnitude == pytest.approx(2.5)
def test_bond_potential_handler(self): top = Topology.from_molecules(Molecule.from_smiles("O=O")) bond_handler = BondHandler(version=0.3) bond_parameter = BondHandler.BondType( smirks="[*:1]~[*:2]", k=1.5 * omm_unit.kilocalorie_per_mole / omm_unit.angstrom**2, length=1.5 * omm_unit.angstrom, id="b1000", ) bond_handler.add_parameter(bond_parameter.to_dict()) from openff.system.stubs import ForceField forcefield = ForceField() forcefield.register_parameter_handler(bond_handler) bond_potentials = forcefield["Bonds"].create_potential(top) top_key = TopologyKey(atom_indices=(0, 1)) pot = bond_potentials.potentials[bond_potentials.slot_map[top_key]] kcal_mol_a2 = unit.Unit("kilocalorie / (angstrom ** 2 * mole)") assert pot.parameters["k"].to(kcal_mol_a2).magnitude == pytest.approx( 1.5)
def test_ignore_errors_all_datasets(fractal_compute_server, factory_type, capsys): """ For each dataset make sure that when the basis is not fully covered the dataset raises warning errors, and verbose information """ client = FractalClient(fractal_compute_server) # molecule containing boron molecule = Molecule.from_smiles("OB(O)C1=CC=CC=C1") scan_enum = workflow_components.ScanEnumerator() scan_enum.add_torsion_scan(smarts="[#6:1]~[#6:2]-[B:3]~[#8:4]") factory = factory_type() factory.add_workflow_components(scan_enum) factory.clear_qcspecs() # add only mm specs factory.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="parsley", spec_description="standard parsley spec") dataset = factory.create_dataset( dataset_name=f"Test ignore_error for {factory.type}", molecules=molecule, description="Test ignore errors dataset", tagline="Testing ignore errors datasets", ) # make sure the dataset raises an error here with pytest.raises(MissingBasisCoverageError): dataset.submit(client=client, ignore_errors=False) # now we want to try again and make sure warnings are raised with pytest.warns(UserWarning): dataset.submit(client=client, ignore_errors=True, verbose=True) info = capsys.readouterr() assert info.out == f"Number of new entries: {dataset.n_records}/{dataset.n_records}\n"
def water_box(n_molecules: int) -> Tuple[Topology, unit.Quantity]: """ Build a water box with the requested number of water molecules. Parameters ---------- n_molecules The number of water molecules that should be put into the water box Returns ------- The openff.toolkit Topology of the system and the position array wrapped with units. """ # Create a topology containing water molecules. molecule = Molecule.from_smiles("O") molecule.generate_conformers(n_conformers=1) topology = Topology.from_molecules([molecule] * n_molecules) topology.box_vectors = (numpy.eye(3) * math.ceil(n_molecules**(1 / 3) + 2) * 2.5 * unit.angstrom) positions = (numpy.vstack([ (molecule.conformers[0].value_in_unit(unit.angstrom) + numpy.array([[x, y, z]]) * 2.5) for x in range(math.ceil(n_molecules**(1 / 3))) for y in range(math.ceil(n_molecules**(1 / 3))) for z in range(math.ceil(n_molecules**(1 / 3))) ])[:topology.n_topology_atoms, :] * unit.angstrom) with open("input.pdb", "w") as file: app.PDBFile.writeFile(topology.to_openmm(), positions, file) return topology, positions
def test_from_openmm_missing_conect(self): """ Test creation of an OpenFF Topology object from an OpenMM Topology when the origin PDB lacks CONECT records """ from simtk.openmm import app pdbfile = app.PDBFile( get_data_file_path("systems/test_systems/1_ethanol_no_conect.pdb")) molecules = [] molecules.append(Molecule.from_smiles("CCO")) with pytest.raises( ValueError, match="No match found for molecule C. This would be a " "very unusual molecule to try and parameterize, " "and it is likely that the data source it was " "read from does not contain connectivity " "information. If this molecule is coming from " "PDB, please ensure that the file contains CONECT " "records.", ) as excinfo: topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules)
def create_index(self, molecule: off.Molecule) -> str: """ Create a specific torsion index for the molecule, this will use the atom map on the molecule. Args: molecule: The molecule for which the dataset index will be generated. Returns: The canonical mapped isomeric smiles, where the mapped indices are on the atoms in the torsion. Important: This dataset uses a non-standard indexing with 4 atom mapped indices representing the atoms in the torsion to be rotated. """ assert "atom_map" in molecule.properties.keys() assert (len(molecule.properties["atom_map"]) == 4 or len(molecule.properties["atom_map"]) == 8) index = molecule.to_smiles(isomeric=True, explicit_hydrogens=True, mapped=True) return index
def test_enumerating_stereoisomers_apply(): """ Test the stereoisomer enumeration. """ enumerate_stereo = workflow_components.EnumerateStereoisomers() # set the options enumerate_stereo.undefined_only = True enumerate_stereo.rationalise = True mols = get_stereoisomers() result = enumerate_stereo.apply(mols, processors=1, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY) for mol in mols: assert mol in result.molecules # make sure no molecules have undefined stereo for molecule in result.molecules: assert Molecule.from_smiles( molecule.to_smiles(), toolkit_registry=RDKitToolkitWrapper()) == molecule assert molecule.n_conformers >= 1
def test_scan_enumerator_2d(): """ Make sure one combination of the 2D scan is tagged. """ mol = Molecule.from_smiles("COc1ccc(cc1)N") scan_tagger = workflow_components.ScanEnumerator() scan_tagger.add_double_torsion( smarts1="[*:1]-[#7X3+0:2]-[#6:3]@[#6,#7:4]", smarts2="[#7X3+0:1](-[*:3])(-[*:4])-[#6:2]@[#6,#7]", scan_range1=(-165, 180), scan_range2=(-60, 60), scan_increments=[15, 4]) result = scan_tagger.apply([mol], processors=1, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY) assert result.n_molecules == 1 indexer = mol.properties["dihedrals"] assert indexer.n_double_torsions == 1 assert indexer.double_torsions[((5, 8), (5, 17))].scan_range1 == (-165, 180) assert indexer.double_torsions[((5, 8), (5, 17))].scan_range2 == (-60, 60)
def test_mbuild_conversion_first_conformer_used(self): """Test that only the first conformer in an OFFMol is used""" offmol = Molecule.from_smiles("C1=CC=C(C=C1)C2=CC=C(C=C2)C3=CC=CC=C3") offmol.generate_conformers(n_conformers=3, rms_cutoff=0.0 * simtk_unit.angstrom) comp = offmol_to_compound(offmol) np.testing.assert_equal( offmol.conformers[0].value_in_unit(simtk_unit.nanometer), comp.xyz, ) with np.testing.assert_raises(AssertionError): np.testing.assert_equal( offmol.conformers[1].value_in_unit(simtk_unit.nanometer), comp.xyz, ) with np.testing.assert_raises(AssertionError): np.testing.assert_equal( offmol.conformers[2].value_in_unit(simtk_unit.nanometer), comp.xyz, )
def dgl_carboxylate(): molecule: Molecule = Molecule.from_mapped_smiles("[H:1][C:2](=[O:3])[O-:4]") resonance_forms = enumerate_resonance_forms( molecule, lowest_energy_only=True, as_dicts=False ) graphs = [ DGLMolecule._molecule_to_dgl(resonance_form, [], []) for resonance_form in resonance_forms ] graph = dgl.batch(graphs) graph.set_batch_num_nodes(graph.batch_num_nodes().sum().reshape((-1,))) graph.set_batch_num_edges( { e_type: graph.batch_num_edges(e_type).sum().reshape((-1,)) for e_type in graph.canonical_etypes } ) return DGLMolecule(graph, len(graphs))
def filter_function(data_row): n_components = data_row["N Components"] for index in range(n_components): smiles = data_row[f"Component {index + 1}"] molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) # noinspection PyUnresolvedReferences atom_charges = [ atom.formal_charge if isinstance(atom.formal_charge, int) else atom.formal_charge.value_in_unit( simtk_unit.elementary_charge) for atom in molecule.atoms ] if numpy.isclose(sum(atom_charges), 0.0): continue return False return True
def test_gmx_14_energies_exist(): # TODO: Make sure 1-4 energies are accurate, not just existent # Use a molecule with only one 1-4 interaction, and # make it between heavy atoms because H-H 1-4 are weak mol = Molecule.from_smiles("ClC#CCl") mol.name = "HPER" mol.generate_conformers(n_conformers=1) parsley = ForceField("openff-1.0.0.offxml") out = Interchange.from_smirnoff(parsley, mol.to_topology()) out.positions = mol.conformers[0] # Put this molecule in a large box with cut-off electrostatics # to prevent it from interacting with images of itself out.box = [40, 40, 40] out["Electrostatics"].method = "cutoff" gmx_energies = get_gromacs_energies(out) # The only possible non-bonded interactions should be from 1-4 intramolecular interactions assert gmx_energies.energies["vdW"].m != 0.0 assert gmx_energies.energies["Electrostatics"].m != 0.0
def to_openff_Molecule(item, molecular_system=None, atom_indices='all', structure_indices='all'): from openff.toolkit.topology import Molecule from molsysmt.api_forms.api_openff_Molecule import to_openff_Molecule as openff_Molecule_to_openff_Molecule tmp_item = Molecule.from_smiles(item) tmp_item = openff_Molecule_to_openff_Molecule( tmp_item, atom_indices=atom_indices, structure_indices=structure_indices, copy_if_all=False) if molecular_system is not None: tmp_molecular_system = molecular_system.combine_with_items( item, atom_indices=atom_indices, structure_indices=structure_indices) else: tmp_molecular_system = None return tmp_item, tmp_molecular_system
def test_are_conformers_identical(smiles, conformer_a): molecule: Molecule = Molecule.from_smiles(smiles) if conformer_a is None: molecule.generate_conformers(n_conformers=1) conformer_a = molecule.conformers[0].value_in_unit(unit.angstrom) # Create a permuted version of the conformer, permuting only topology symmetric # atoms. indexed_smiles = molecule.to_smiles(isomeric=False, mapped=True) matches = GLOBAL_TOOLKIT_REGISTRY.call("find_smarts_matches", molecule, indexed_smiles) permuted_indices = next( iter(match for match in matches if match != tuple(range(len(match))))) conformer_b = perturb_conformer(conformer_a.copy(), False)[permuted_indices, :] assert are_conformers_identical(molecule, conformer_a, conformer_b) assert not are_conformers_identical(molecule, conformer_a, conformer_b * 2.0)
def test_torsiondrive_torsion_string(): """ Test the torsiondrive factories ability to create a torsion string for a given bond. """ factory = TorsiondriveDatasetFactory() methanol = Molecule.from_file(get_data("methanol.sdf"), "sdf") rotatable = methanol.find_rotatable_bonds() assert len(rotatable) == 1 bond = rotatable[0] torsion = factory._get_torsion_string(bond=bond) # now make sure this torsion is in the propers list reference_torsions = [] for proper in methanol.propers: dihedral = [] for atom in proper: dihedral.append(atom.molecule_atom_index) reference_torsions.append(tuple(dihedral)) assert torsion in reference_torsions or tuple(reversed(torsion)) in reference_torsions
def smiles_to_molecule(smiles, add_atom_map: bool = False) -> Molecule: """Create a molecule object from an input SMILES pattern. Parameters ---------- smiles : str SMILES representation of desired molecule. add_atom_map Whether to create a canonical atom map for the molecule. Returns ------- A normalized molecule with desired smiles string. """ molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) # Add canonical ordered atom maps if add_atom_map: molecule = molecule.canonical_order_atoms() molecule.properties["atom_map"] = {i: i + 1 for i in range(molecule.n_atoms)} return molecule
def from_smiles( cls: Type["DGLMolecule"], smiles: str, atom_features: List[AtomFeature], bond_features: List[BondFeature], ) -> "DGLMolecule": """Creates a new molecular graph representation from a SMILES pattern. Args: smiles: The SMILES representation of the molecule to store in the graph. atom_features: The atom features to compute for the molecule. bond_features: The bond features to compute for the molecule. Returns: The constructed graph. """ from openff.toolkit.topology import Molecule return cls.from_openff( Molecule.from_smiles(smiles), atom_features, bond_features, )
def label_function(molecule: Molecule) -> Dict[str, torch.Tensor]: """Generates a set of train / val / test labels for a given molecule.""" from simtk import unit # Generate a set of ELF10 conformers. molecule.generate_conformers(n_conformers=800, rms_cutoff=0.05 * unit.angstrom) molecule.apply_elf_conformer_selection() partial_charges = [] for conformer in molecule.conformers: molecule.assign_partial_charges("am1-mulliken", use_conformers=[conformer]) partial_charges.append( molecule.partial_charges.value_in_unit(unit.elementary_charge)) return { "am1-charges": torch.from_numpy(numpy.mean(partial_charges, axis=0)).float() }
def add_molecule(self, molecule: off.Molecule) -> None: """ Add a molecule to this filter. """ self.molecules.append( molecule.to_smiles(isomeric=True, explicit_hydrogens=True))
def unique_molecules(self): molecules = ["O", "C1CCCCC1", "C", "CCC", "CCO", "CCCCO"] return [Molecule.from_smiles(mol) for mol in molecules]
def from_openff_molecule(cls, molecule: Molecule) -> "MoleculeAttributes": """Create the Cmiles metadata for an OpenFF molecule object. Parameters: molecule: The molecule for which the cmiles data will be generated. Returns: The Cmiles identifiers generated for the input molecule. Note: The Cmiles identifiers currently include: - `canonical_smiles` - `canonical_isomeric_smiles` - `canonical_explicit_hydrogen_smiles` - `canonical_isomeric_explicit_hydrogen_smiles` - `canonical_isomeric_explicit_hydrogen_mapped_smiles` - `molecular_formula` - `standard_inchi` - `inchi_key` - `fixed_hydrogen_inchi` - `fixed_hydrogen_inchi_key` - `unique_fixed_hydrogen_inchi_keys` """ molecules = split_openff_molecule(molecule=molecule) unique_fixed_hydrogen_inchi_keys = { mol.to_inchikey(fixed_hydrogens=True) for mol in molecules } cmiles = { "canonical_smiles": molecule.to_smiles(isomeric=False, explicit_hydrogens=False, mapped=False), "canonical_isomeric_smiles": molecule.to_smiles(isomeric=True, explicit_hydrogens=False, mapped=False), "canonical_explicit_hydrogen_smiles": molecule.to_smiles(isomeric=False, explicit_hydrogens=True, mapped=False), "canonical_isomeric_explicit_hydrogen_smiles": molecule.to_smiles(isomeric=True, explicit_hydrogens=True, mapped=False), "canonical_isomeric_explicit_hydrogen_mapped_smiles": molecule.to_smiles(isomeric=True, explicit_hydrogens=True, mapped=True), "molecular_formula": molecule.hill_formula, "standard_inchi": molecule.to_inchi(fixed_hydrogens=False), "inchi_key": molecule.to_inchikey(fixed_hydrogens=False), "fixed_hydrogen_inchi": molecule.to_inchi(fixed_hydrogens=True), "fixed_hydrogen_inchi_key": molecule.to_inchikey(fixed_hydrogens=True), "unique_fixed_hydrogen_inchi_keys": unique_fixed_hydrogen_inchi_keys, } return MoleculeAttributes(**cmiles)
def openff_carboxylate() -> Molecule: return Molecule.from_mapped_smiles("[C:1]([O-:2])(=[O:3])([H:4])")
def run(): # Create initial model system, topology, and positions. smiles_list = ["CC", "CCC", "CCCC"] initial_molecule = smiles_to_oemol("CC") molecules = [Molecule.from_openeye(initial_molecule)] system_generator = SystemGenerator(molecules=molecules) initial_sys, initial_pos, initial_top = OEMol_to_omm_ff( initial_molecule, system_generator) smiles = "CC" stats = {ms: 0 for ms in smiles_list} # Run parameters temperature = 300.0 * unit.kelvin # temperature pressure = 1.0 * unit.atmospheres # pressure collision_rate = 5.0 / unit.picoseconds # collision rate for Langevin dynamics # Create proposal metadata, such as the list of molecules to sample (SMILES here) # proposal_metadata = {"smiles_list": smiles_list} list_of_oemols = [] for smile in smiles_list: oemol = smiles_to_oemol(smile) list_of_oemols.append(oemol) transformation = topology_proposal.SmallMoleculeSetProposalEngine( list_of_oemols=list_of_oemols, system_generator=system_generator) # transformation = topology_proposal.SingleSmallMolecule(proposal_metadata) # Initialize weight calculation engine, along with its metadata bias_calculator = bias_engine.MinimizedPotentialBias(smiles_list) # Initialize NCMC engines. switching_timestep = (1.0 * unit.femtosecond ) # Timestep for NCMC velocity Verlet integrations switching_nsteps = 10 # Number of steps to use in NCMC integration switching_functions = { # Functional schedules to use in terms of `lambda`, which is switched from 0->1 for creation and 1->0 for deletion "lambda_sterics": "lambda", "lambda_electrostatics": "lambda", "lambda_bonds": "lambda", "lambda_angles": "sqrt(lambda)", "lambda_torsions": "lambda", } ncmc_engine = ncmc_switching.NCMCEngine( temperature=temperature, timestep=switching_timestep, nsteps=switching_nsteps, functions=switching_functions, ) # Initialize GeometryEngine geometry_metadata = {"data": 0} # currently ignored geometry_engine = geometry.FFAllAngleGeometryEngine(geometry_metadata) # Run a number of iterations. niterations = 50 system = initial_sys topology = initial_top positions = initial_pos current_log_weight = bias_calculator.g_k(smiles) n_accepted = 0 propagate = True for i in range(niterations): # Store old (system, topology, positions). # Propose a transformation from one chemical species to another. state_metadata = {"molecule_smiles": smiles} top_proposal = transformation.propose( system, topology, positions, state_metadata) # Get a new molecule # QUESTION: What about instead initializing StateWeight once, and then using # log_state_weight = state_weight.computeLogStateWeight(new_topology, new_system, new_metadata)? log_weight = bias_calculator.g_k( top_proposal.metadata["molecule_smiles"]) # Perform alchemical transformation. # Alchemically eliminate atoms being removed. [ncmc_old_positions, ncmc_elimination_logp] = ncmc_engine.integrate(top_proposal, positions, direction="delete") # Generate coordinates for new atoms and compute probability ratio of old and new probabilities. # QUESTION: Again, maybe we want to have the geometry engine initialized once only? geometry_proposal = geometry_engine.propose( top_proposal.new_to_old_atom_map, top_proposal.new_system, system, ncmc_old_positions, ) # Alchemically introduce new atoms. [ncmc_new_positions, ncmc_introduction_logp ] = ncmc_engine.integrate(top_proposal, geometry_proposal.new_positions, direction="insert") # Compute total log acceptance probability, including all components. logp_accept = (top_proposal.logp_proposal + geometry_proposal.logp + ncmc_elimination_logp + ncmc_introduction_logp + log_weight / log_weight.unit - current_log_weight / current_log_weight.unit) # Accept or reject. if ((logp_accept >= 0.0) or (np.random.uniform() < np.exp(logp_accept))) and not np.any( np.isnan(ncmc_new_positions)): # Accept. n_accepted += 1 (system, topology, positions, current_log_weight, smiles) = ( top_proposal.new_system, top_proposal.new_topology, ncmc_new_positions, log_weight, top_proposal.metadata["molecule_smiles"], ) else: # Reject. logging.debug("reject") stats[smiles] += 1 print(positions) if propagate: p_system = copy.deepcopy(system) integrator = openmm.LangevinIntegrator(temperature, collision_rate, switching_timestep) context = openmm.Context(p_system, integrator) context.setPositions(positions) print(context.getState(getEnergy=True).getPotentialEnergy()) integrator.step(1000) state = context.getState(getPositions=True) positions = state.getPositions(asNumpy=True) del context, integrator, p_system print("The total number accepted was %d out of %d iterations" % (n_accepted, niterations)) print(stats)
def test_parmed_openmm(tmpdir, smiles): tmpdir.chdir() parsley = ForceField("openff_unconstrained-1.0.0.offxml") mol = Molecule.from_smiles(smiles) mol.generate_conformers(n_conformers=1) top = Topology.from_molecules(mol) box = 4 * np.eye(3) * unit.nanometer with tempfile.TemporaryDirectory() as omm_tempdir: with temporary_cd(omm_tempdir): openff_openmm_pmd_gmx( topology=top, forcefield=parsley, box=box, prefix="via_openmm", ) ener1 = _run_gmx_energy( top_file="via_openmm.top", gro_file="via_openmm.gro", mdp_file=_get_mdp_file("cutoff"), ) with tempfile.TemporaryDirectory() as off_tempdir: with temporary_cd(off_tempdir): openff_pmd_gmx_indirect( topology=top, forcefield=parsley, box=box, prefix="via_conversion", ) ener2 = _run_gmx_energy( top_file="via_conversion.top", gro_file="via_conversion.gro", mdp_file=_get_mdp_file("cutoff"), ) ener1.compare(ener2) with tempfile.TemporaryDirectory() as off_tempdir: with temporary_cd(off_tempdir): openff_pmd_gmx_direct( topology=top, forcefield=parsley, box=box, prefix="via_call", ) ener3 = _run_gmx_energy( top_file="via_call.top", gro_file="via_call.gro", mdp_file=_get_mdp_file("cutoff"), ) ener2.compare( ener3, custom_tolerances={ "Bond": 1.0 * omm_unit.kilojoule_per_mole, "Angle": 0.22 * omm_unit.kilojoule_per_mole, }, )
def _extract_oe_fragment( molecule: Molecule, atom_indices: Set[int], bond_indices: Set[Tuple[int, int]] ) -> Molecule: from openeye import oechem oe_molecule = molecule.to_openeye() # Restore the map indices as to_openeye does not automatically add them. for atom_index, map_index in molecule.properties["atom_map"].items(): oe_atom = oe_molecule.GetAtom(oechem.OEHasAtomIdx(atom_index)) oe_atom.SetMapIdx(map_index) # Include any Hs bonded to the included atom set so we can retain their map # indices. for map_index in {*atom_indices}: oe_atom = oe_molecule.GetAtom(oechem.OEHasMapIdx(map_index)) for neighbour in oe_atom.GetAtoms(): if ( neighbour.GetAtomicNum() != 1 or neighbour.GetMapIdx() < 1 or neighbour.GetMapIdx() in atom_indices ): continue atom_indices.add(neighbour.GetMapIdx()) bond_indices.add((map_index, neighbour.GetMapIdx())) atom_bond_set = oechem.OEAtomBondSet() for map_index in atom_indices: atom = oe_molecule.GetAtom(oechem.OEHasMapIdx(map_index)) atom_bond_set.AddAtom(atom) for map_index_1, map_index_2 in bond_indices: atom_1 = oe_molecule.GetAtom(oechem.OEHasMapIdx(map_index_1)) atom_2 = oe_molecule.GetAtom(oechem.OEHasMapIdx(map_index_2)) bond = oe_molecule.GetBond(atom_1, atom_2) if not bond: raise ValueError(f"{(map_index_1, map_index_2)} is a disconnected bond") atom_bond_set.AddBond(bond) atom_predicate = oechem.OEIsAtomMember(atom_bond_set.GetAtoms()) bond_predicate = oechem.OEIsBondMember(atom_bond_set.GetBonds()) fragment = oechem.OEMol() oechem.OESubsetMol(fragment, oe_molecule, atom_predicate, bond_predicate, True) oechem.OEAddExplicitHydrogens(fragment) oechem.OEPerceiveChiral(fragment) # Always restore map? # if restore_maps: # In some cases (symmetric molecules) this changes the atom map so skip it # restore_atom_map(fragment) # atom map should be restored for combinatorial fragmentation # Perceive stereo and check that defined stereo did not change oechem.OEPerceiveChiral(fragment) oechem.OE3DToAtomStereo(fragment) oechem.OE3DToBondStereo(fragment) return Molecule.from_openeye(fragment, allow_undefined_stereo=True)
def _extract_rd_fragment( molecule: Molecule, atom_indices: Set[int], bond_indices: Set[Tuple[int, int]] ) -> Molecule: from rdkit import Chem rd_molecule = Chem.RWMol(molecule.to_rdkit()) rd_atoms_by_map: Dict[int, Chem.Atom] = {} # Restore the map indices as to_rdkit does not automatically add them. for atom in rd_molecule.GetAtoms(): atom.SetAtomMapNum(get_map_index(molecule, atom.GetIdx())) rd_atoms_by_map[atom.GetAtomMapNum()] = atom atoms_to_use = [get_atom_index(molecule, i) for i in atom_indices] bonds_to_use = [ rd_molecule.GetBondBetweenAtoms( get_atom_index(molecule, pair[0]), get_atom_index(molecule, pair[1]) ).GetIdx() for pair in bond_indices ] # Make sure to include any Hs bonded to the included atom set otherwise radicals # will form. for map_index in atom_indices: for neighbour in rd_atoms_by_map[map_index].GetNeighbors(): if ( neighbour.GetAtomicNum() != 1 or neighbour.GetAtomMapNum() < 1 or neighbour.GetAtomMapNum() in atom_indices ): continue atoms_to_use.append(neighbour.GetIdx()) bonds_to_use.append( rd_molecule.GetBondBetweenAtoms( rd_atoms_by_map[map_index].GetIdx(), neighbour.GetIdx() ).GetIdx() ) # Add additional hydrogens to atoms where the total valence will change likewise to # ensure the valence does not change. rd_atoms_by_index = {atom.GetIdx(): atom for atom in rd_molecule.GetAtoms()} for atom_index in [*atoms_to_use]: atom = rd_atoms_by_index[atom_index] old_valence = atom.GetTotalValence() new_valence = atom.GetTotalValence() for neighbour_bond in rd_atoms_by_index[atom_index].GetBonds(): if ( neighbour_bond.GetBeginAtomIdx() in atoms_to_use and neighbour_bond.GetEndAtomIdx() in atoms_to_use ): continue new_valence -= neighbour_bond.GetValenceContrib(atom) if numpy.isclose(old_valence, new_valence): # Skip the cases where the valence won't change continue if ( atom.GetAtomicNum() == 6 and atom.GetIsAromatic() and sum( 1 for bond_tuple in bond_indices if atom.GetAtomMapNum() in bond_tuple ) == 1 ): # This is likely a cap carbon which was retained from an existing ring. It's # aromaticity needs to be cleared before calling ``MolFragmentToSmiles`` # otherwise will (understandably) be confused and throw an exception. atom.SetIsAromatic(False) # Add a hydrogen to the atom whose valence will change. for _ in range(int(numpy.rint(old_valence - new_valence))): new_atom = Chem.Atom(1) new_atom_index = rd_molecule.AddAtom(new_atom) rd_molecule.AddBond(atom_index, new_atom_index) new_bond = rd_molecule.GetBondBetweenAtoms(atom_index, new_atom_index) new_bond.SetBondType(Chem.BondType.SINGLE) new_bond.SetIsAromatic(False) atoms_to_use.append(new_atom_index) bonds_to_use.append(new_bond.GetIdx()) fragment_smiles = Chem.MolFragmentToSmiles(rd_molecule, atoms_to_use, bonds_to_use) fragment = Molecule.from_smiles(fragment_smiles, allow_undefined_stereo=True) return fragment
def __init__(self, protein_filename, mutation_chain_id, mutation_residue_id, proposed_residue, phase='complex', conduct_endstate_validation=True, ligand_input=None, ligand_index=0, water_model='tip3p', ionic_strength=0.15 * unit.molar, forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50), forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, periodic_forcefield_kwargs={'nonbondedMethod': app.PME}, nonperiodic_forcefield_kwargs=None, small_molecule_forcefields='gaff-2.11', complex_box_dimensions=None, apo_box_dimensions=None, flatten_torsions=False, flatten_exceptions=False, repartitioned_endstate=None, **kwargs): """ arguments protein_filename : str path to protein (to mutate); .pdb mutation_chain_id : str name of the chain to be mutated mutation_residue_id : str residue id to change proposed_residue : str three letter code of the residue to mutate to phase : str, default complex if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p conduct_endstate_validation : bool, default True whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory, endstate validation cannot and will not be conducted. ligand_file : str, default None path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb ligand_index : int, default 0 which ligand to use water_model : str, default 'tip3p' solvent model to use for solvation ionic_strength : float * unit.molar, default 0.15 * unit.molar the total concentration of ions (both positive and negative) to add using Modeller. This does not include ions that are added to neutralize the system. Note that only monovalent ions are currently supported. forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield files for proteins and solvent barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) barostat to use forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus} forcefield kwargs for system parametrization periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME} periodic forcefield kwargs for system parametrization nonperiodic_forcefield_kwargs : dict, default None non-periodic forcefield kwargs for system parametrization small_molecule_forcefields : str, default 'gaff-2.11' the forcefield string for small molecule parametrization complex_box_dimensions : Vec3, default None define box dimensions of complex phase; if None, padding is 1nm apo_box_dimensions : Vec3, default None define box dimensions of apo phase phase; if None, padding is 1nm flatten_torsions : bool, default False in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1 flatten_exceptions : bool, default False in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1 repartitioned_endstate : int, default None the endstate (0 or 1) at which to build the RepartitionedHybridTopologyFactory. By default, this is None, meaning a vanilla HybridTopologyFactory will be built. TODO : allow argument for spectator ligands besides the 'ligand_file' """ # First thing to do is load the apo protein to mutate... protein_pdbfile = open(protein_filename, 'r') protein_pdb = app.PDBFile(protein_pdbfile) protein_pdbfile.close() protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology) protein_topology = protein_md_topology.to_openmm() protein_n_atoms = protein_md_topology.n_atoms # Load the ligand, if present molecules = [] if ligand_input: if isinstance(ligand_input, str): if ligand_input.endswith('.sdf'): # small molecule ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index) molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol), forcefield_generators.generateTopologyFromOEMol(ligand_mol) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms if ligand_input.endswith('pdb'): # protein ligand_pdbfile = open(ligand_input, 'r') ligand_pdb = app.PDBFile(ligand_pdbfile) ligand_pdbfile.close() ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm( ligand_pdb.topology) ligand_n_atoms = ligand_md_topology.n_atoms elif isinstance(ligand_input, oechem.OEMol): # oemol object molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input), forcefield_generators.generateTopologyFromOEMol(ligand_input) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms else: _logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file') return # Now create a complex complex_md_topology = protein_md_topology.join(ligand_md_topology) complex_topology = complex_md_topology.to_openmm() complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers) complex_positions[:protein_n_atoms, :] = protein_positions complex_positions[protein_n_atoms:, :] = ligand_positions # Now for a system_generator self.system_generator = SystemGenerator(forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefields, molecules=molecules, cache=None) # Solvate apo and complex... apo_input = list(self._solvate(protein_topology, protein_positions, water_model, phase, ionic_strength, apo_box_dimensions)) inputs = [apo_input] if ligand_input: inputs.append(self._solvate(complex_topology, complex_positions, water_model, phase, ionic_strength, complex_box_dimensions)) geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles = False, use_14_nonbondeds = True) # Run pipeline... htfs = [] for (top, pos, sys) in inputs: point_mutation_engine = PointMutationEngine(wildtype_topology=top, system_generator=self.system_generator, chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable) max_point_mutants=1, residues_allowed_to_mutate=[mutation_residue_id], # The residue ids allowed to mutate allowed_mutations=[(mutation_residue_id, proposed_residue)], # The residue ids allowed to mutate with the three-letter code allowed to change aggregate=True) # Always allow aggregation topology_proposal = point_mutation_engine.propose(sys, top) # Only validate energy bookkeeping if the WT and proposed residues do not involve rings old_res = [res for res in top.residues() if res.id == mutation_residue_id][0] validate_bool = False if old_res.name in ring_amino_acids or proposed_residue in ring_amino_acids else True new_positions, logp_proposal = geometry_engine.propose(topology_proposal, pos, beta, validate_energy_bookkeeping=validate_bool) logp_reverse = geometry_engine.logp_reverse(topology_proposal, new_positions, pos, beta, validate_energy_bookkeeping=validate_bool) if repartitioned_endstate is None: factory = HybridTopologyFactory elif repartitioned_endstate in [0, 1]: factory = RepartitionedHybridTopologyFactory forward_htf = factory(topology_proposal=topology_proposal, current_positions=pos, new_positions=new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=flatten_exceptions, omitted_terms=None, endstate=repartitioned_endstate, flatten_torsions=flatten_torsions) if not topology_proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not topology_proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential if conduct_endstate_validation and repartitioned_endstate is None: zero_state_error, one_state_error = validate_endstate_energies(forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD) if zero_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 0 state is above the threshold ({ENERGY_THRESHOLD}): {zero_state_error}") if one_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 1 state is above the threshold ({ENERGY_THRESHOLD}): {one_state_error}") else: pass htfs.append(forward_htf) self.apo_htf = htfs[0] self.complex_htf = htfs[1] if ligand_input else None
def __init__(self, metadata=None, name="", *args, **kwargs): _OpenForceFieldMolecule.__init__(self, *args, **kwargs) BaseLigand.__init__(self, name=name, metadata=metadata)