def test_parent_fragment_mapping(molecules): """ Test generating a parent fragment mapping. """ molecule1, molecule2, atom_map = molecules mol1 = Molecule.from_file(get_data(molecule1), "sdf") mol2 = Molecule.from_file(get_data(molecule2), "sdf") mapping = FragmentEngine._get_fragment_parent_mapping(fragment=mol2, parent=mol1) assert mapping == atom_map
def setUp(self): self.empty_molecule = Molecule() self.ethane_from_smiles = Molecule.from_smiles("CC") self.ethene_from_smiles = Molecule.from_smiles("C=C") self.propane_from_smiles = Molecule.from_smiles("CCC") filename = get_data_file_path("molecules/toluene.sdf") self.toluene_from_sdf = Molecule.from_file(filename) if OpenEyeToolkitWrapper.is_available(): filename = get_data_file_path("molecules/toluene_charged.mol2") # TODO: This will require openeye to load self.toluene_from_charged_mol2 = Molecule.from_file(filename) self.charged_methylamine_from_smiles = Molecule.from_smiles( "[H]C([H])([H])[N+]([H])([H])[H]") molecule = Molecule.from_smiles("CC") carbons = [atom for atom in molecule.atoms if atom.atomic_number == 6] c0_hydrogens = [ atom for atom in carbons[0].bonded_atoms if atom.atomic_number == 1 ] molecule.add_bond_charge_virtual_site( (carbons[0], carbons[1]), 0.1 * unit.angstrom, charge_increments=[0.1, 0.05] * unit.elementary_charge, ) molecule.add_monovalent_lone_pair_virtual_site( (c0_hydrogens[0], carbons[0], carbons[1]), 0.2 * unit.angstrom, 20 * unit.degree, 25 * unit.degree, charge_increments=[0.01, 0.02, 0.03] * unit.elementary_charge, ) self.ethane_from_smiles_w_vsites = Molecule(molecule) # Make a propane with virtual sites molecule = Molecule.from_smiles("CCC") carbons = [atom for atom in molecule.atoms if atom.atomic_number == 6] c0_hydrogens = [ atom for atom in carbons[0].bonded_atoms if atom.atomic_number == 1 ] molecule.add_bond_charge_virtual_site( (carbons[0], carbons[1]), 0.1 * unit.angstrom, charge_increments=[0.1, 0.05] * unit.elementary_charge, ) molecule.add_monovalent_lone_pair_virtual_site( (c0_hydrogens[0], carbons[0], carbons[1]), 0.2 * unit.angstrom, 20 * unit.degree, 25 * unit.degree, charge_increments=[0.01, 0.02, 0.03] * unit.elementary_charge, ) self.propane_from_smiles_w_vsites = Molecule(molecule)
def test_abinitio_fitting_prep_no_gradient(): """ Test preparing for fitting using the abinitio target. """ torsion_target = AbInitio_SMIRNOFF() torsion_target.fit_gradient = False target_schema = biphenyl_target(target=torsion_target) biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now load in a scan result we have saved result_data = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) # now try and update the results target_schema.update_with_results(results=result_data) assert target_schema.ready_for_fitting is True # now try and prep for fitting with temp_directory(): torsion_target.prep_for_fitting(fitting_target=target_schema) # we should only have one torsion drive to do here folders = os.listdir(".") assert len(folders) == 1 target_files = os.listdir(folders[0]) assert "molecule.pdb" in target_files assert "scan.xyz" in target_files assert "molecule.mol2" in target_files assert "qdata.txt" in target_files # now we need to make sure the pdb order was not changed mol = Molecule.from_file(os.path.join(folders[0], "molecule.pdb"), file_format="pdb") isomorphic, atom_map = Molecule.are_isomorphic(biphenyl, mol, return_atom_map=True) assert isomorphic is True assert atom_map == dict((i, i) for i in range(biphenyl.n_atoms)) # also make sure charges are in the mol2 file mol = Molecule.from_file(os.path.join(folders[0], "molecule.mol2"), "mol2") assert mol.partial_charges is not None # make sure the scan coords and energies match qdata_file = os.path.join(folders[0], "qdata.txt") coords, energies, gradients = read_qdata(qdata_file=qdata_file) # make sure no gradients were written assert not gradients reference_data = target_schema.tasks[0].reference_data() for i, (coord, energy) in enumerate(zip(coords, energies)): # find the reference data data = reference_data[i] assert data.energy == energy assert coord == data.molecule.geometry.flatten().tolist()
def test_collecting_results(): """ Make sure that tasks are collected correctly from a QCArchive instance. """ # connect to the public database client = FractalClient() biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now make the schema schema = get_fitting_schema(molecules=biphenyl) # now submit to the executor executor = Executor() # change to make sure we search the correct dataset executor._dataset_name = "OpenFF-benchmark-ligand-fragments-v1.0" # fake a collection dict to_collect = { "torsion1d": { "default": [ "[h]c1c([c:1]([c:2](c(c1[h])[h])[c:3]2[c:4](c(c(c(c2[h])cl)[h])[h])[h])[h])[h]", ] }, "optimization": {}, "hessian": {} } # now let the executor update the task executor.collect_task_results(task=schema.tasks[0], collection_dict=to_collect, client=client) # make sure it worked assert schema.tasks[0].ready_for_fitting is True
def test_optimizer_explicit(): """ Run the optimizer process in the main thread to make sure it works. """ biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now make the schema schema = get_fitting_schema(molecules=biphenyl) result = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) schema.update_with_results(results=result) # now submit to the executor execute = Executor() # we dont need the server here # put a task in the opt queue then kill it execute.total_tasks = 1 execute.opt_queue.put(schema.tasks[0]) with temp_directory(): execute.optimizer() # find the task in the finished queue task = execute.finished_tasks.get() result_schema = execute.update_fitting_schema(task=task, fitting_schema=schema) smirks = result_schema.tasks[0].final_smirks # make sure they have been updated for smirk in smirks: for term in smirk.terms.values(): assert float(term.k.split()[0]) != 1e-5
def test_load_one_mol_mol2_with_charge(self, toolkit): """ Test loading one molecule from a .mol2 file WITHOUT charges .. note :: This file was generated via the one-liner below and has nan charges, which may or may not be valid. ``` Molecule.from_smiles('CCO').to_file( 'ethanol_no_charges.mol2', file_format='mol2', ) ``` """ registry = make_registry(toolkit) ethanol_partial_charges = get_data_file_path( "molecules/ethanol_no_charges.mol2") charges_in = Molecule.from_file( ethanol_partial_charges).partial_charges mols_out = generate_conformers( molecule=ethanol_partial_charges, forcefield="openff-1.0.0.offxml", registry=registry, ) assert len(mols_out) == 1 assert not charges_in assert not mols_out[0].partial_charges
def get_stereoisomers(): """ Get a set of molecules that all have some undefined stereochemistry. """ mols = Molecule.from_file(get_data("stereoisomers.smi"), allow_undefined_stereo=True) return mols
def from_directory( cls: Type["TorsionTarget"], directory: str, name: str, options: Dict[str, Any] ) -> "TorsionTarget": from openforcefield.topology import Molecule input_molecule_path = os.path.join(directory, options["mol2"]) input_molecule = Molecule.from_file( input_molecule_path, allow_undefined_stereo=True ) # Store a SMILES pattern with the driven torsion tagged with map indices. with open(os.path.join(directory, "metadata.json")) as file: metadata = json.load(file) dihedrals = metadata["dihedrals"] if len(dihedrals) != 1: raise NotImplementedError() atom_indices = dihedrals[0] rd_molecule = input_molecule.to_rdkit() for i, index in enumerate(atom_indices): rd_atom = rd_molecule.GetAtomWithIdx(index) rd_atom.SetAtomMapNum(i + 1) return cls( name=name, molecule=Chem.MolToSmiles(rd_molecule), options=options, )
def test_chemical_environments_matches_OE(self): """Test Topology.chemical_environment_matches""" from simtk.openmm import app toolkit_wrapper = OpenEyeToolkitWrapper() pdbfile = app.PDBFile( get_data_file_path( "systems/packmol_boxes/cyclohexane_ethanol_0.4_0.6.pdb")) # toolkit_wrapper = RDKitToolkitWrapper() molecules = [ Molecule.from_file(get_data_file_path(name)) for name in ("molecules/ethanol.mol2", "molecules/cyclohexane.mol2") ] topology = Topology.from_openmm(pdbfile.topology, unique_molecules=molecules) # Test for substructure match matches = topology.chemical_environment_matches( "[C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 143 assert matches[0].topology_atom_indices == (1728, 1729, 1730) # Test for whole-molecule match matches = topology.chemical_environment_matches( "[H][C:1]([H])([H])-[C:2]([H])([H])-[O:3][H]", toolkit_registry=toolkit_wrapper, ) assert (len(matches) == 1716 ) # 143 * 12 (there are 12 possible hydrogen mappings) assert matches[0].topology_atom_indices == (1728, 1729, 1730) # Search for a substructure that isn't there matches = topology.chemical_environment_matches( "[C][C:1]-[C:2]-[O:3]", toolkit_registry=toolkit_wrapper) assert len(matches) == 0
def test_torsiondrive_torsion_string(): """ Test the torsiondrive factories ability to create a torsion string for a given bond. """ factory = TorsiondriveDatasetFactory() methanol = Molecule.from_file(get_data("methanol.sdf"), "sdf") rotatable = methanol.find_rotatable_bonds() assert len(rotatable) == 1 bond = rotatable[0] torsion = factory._get_torsion_string(bond=bond) # now make sure this torsion is in the propers list reference_torsions = [] for proper in methanol.propers: dihedral = [] for atom in proper: dihedral.append(atom.molecule_atom_index) reference_torsions.append(tuple(dihedral)) assert torsion in reference_torsions or tuple( reversed(torsion)) in reference_torsions
def openff(self): # Load the molecule (for now mol2, until charges are saved on sdf) molecule = Molecule.from_file(self.lig + '.mol2') topology = Topology.from_molecules([molecule]) # Label using the smirnoff99Frosst force field self.forcefield = ForceField('smirnoff99Frosst.offxml') self.parameters = self.forcefield.label_molecules(topology)[0]
def get_tautomers(): """ Get a set of molecules that all have tautomers """ mols = Molecule.from_file(get_data("tautomers_small.smi"), allow_undefined_stereo=True) return mols
def build_context(offxml, molfile): """ Build an OpenMM Context from a offxml file and a molecule file """ forcefield = ForceField(offxml, allow_cosmetic_attributes=True) molecule = OffMolecule.from_file(molfile) system = forcefield.create_openmm_system(molecule.to_topology()) integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds) platform = openmm.Platform.getPlatformByName('Reference') context = openmm.Context(system, integrator, platform) return context
def _execute(self, directory, available_resources): from openforcefield.topology import Molecule, Topology pdb_file = app.PDBFile(self.coordinate_file_path) force_field_source = ForceFieldSource.from_json(self.force_field_path) if not isinstance(force_field_source, SmirnoffForceFieldSource): raise ValueError( "Only SMIRNOFF force fields are supported by this protocol.") force_field = force_field_source.to_force_field() unique_molecules = [] charged_molecules = [] if self.apply_known_charges: charged_molecules = self._generate_known_charged_molecules() # Load in any additional, user specified charged molecules. for charged_molecule_path in self.charged_molecule_paths: charged_molecule = Molecule.from_file(charged_molecule_path, "MOL2") charged_molecules.append(charged_molecule) for component in self.substance.components: molecule = Molecule.from_smiles(smiles=component.smiles) if molecule is None: raise ValueError( f"{component} could not be converted to a Molecule") unique_molecules.append(molecule) topology = Topology.from_openmm(pdb_file.topology, unique_molecules=unique_molecules) if len(charged_molecules) > 0: system = force_field.create_openmm_system( topology, charge_from_molecules=charged_molecules) else: system = force_field.create_openmm_system(topology) if system is None: raise RuntimeError( "Failed to create a system from the specified topology and molecules." ) system_xml = openmm.XmlSerializer.serialize(system) self.system_path = os.path.join(directory, "system.xml") with open(self.system_path, "w") as file: file.write(system_xml)
def setUp(self): self.testsystems = dict() for (system_name, prefix) in [ # TODO: Uncomment these after we fix input files #('bace', 'Bace'), #('cdk1', 'CDK2'), ('jnk1', 'Jnk1'), #('mcl1', 'MCL1'), #('p38', 'p38'), #('ptp1b', 'PTP1B'), #('thrombin', 'Thrombin'), #('tyk2', 'Tyk2'), ]: # Load protein from simtk.openmm.app import PDBFile pdb_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_protein_fixed.pdb')) pdbfile = PDBFile(pdb_filename) # Load molecules from openforcefield.topology import Molecule sdf_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_ligands.sdf')) molecules = Molecule.from_file(sdf_filename, allow_undefined_stereo=True) print(f'Read {len(molecules)} molecules from {sdf_filename}') # Filter molecules as appropriate molecules = self.filter_molecules(molecules) n_molecules = len(molecules) print(f'{n_molecules} molecules remain after filtering') if n_molecules == 0: continue # Create structures import parmed protein_structure = parmed.load_file(pdb_filename) molecules_structure = parmed.load_file(sdf_filename) complex_structures = [ (protein_structure + molecules_structure[index]) for index in range(n_molecules) ] # Store testsystem = { 'name' : system_name, 'protein_pdbfile' : pdbfile, 'molecules' : molecules, 'complex_structures' : complex_structures } self.testsystems[system_name] = testsystem # TODO: Create other test topologies # TODO: Protein-only # TODO: Protein-ligand topology # TODO: Solvated protein-ligand topology # TODO: Host-guest topology # Suppress DEBUG logging from various packages import logging for name in ['parmed', 'matplotlib']: logging.getLogger(name).setLevel(logging.WARNING)
def test_jacs_ligands(self): """Use template generator to parameterize the Schrodinger JACS set of ligands""" from simtk.openmm.app import ForceField, NoCutoff jacs_systems = { #'bace' : { 'prefix' : 'Bace' }, #'cdk2' : { 'prefix' : 'CDK2' }, 'jnk1' : { 'prefix' : 'Jnk1' }, 'mcl1' : { 'prefix' : 'MCL1' }, #'p38' : { 'prefix' : 'p38' }, 'ptp1b' : { 'prefix' : 'PTP1B' }, 'thrombin' : { 'prefix' : 'Thrombin' }, #'tyk2' : { 'prefix' : 'Tyk2' }, } for system_name in jacs_systems: prefix = jacs_systems[system_name]['prefix'] # Load molecules ligand_sdf_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_ligands.sdf')) print(f'Reading molecules from {ligand_sdf_filename} ...') from openforcefield.topology import Molecule molecules = Molecule.from_file(ligand_sdf_filename, allow_undefined_stereo=True) # Ensure this is a list try: nmolecules = len(molecules) except TypeError: molecules = [molecules] print(f'Read {len(molecules)} molecules from {ligand_sdf_filename}') #molecules = self.filter_molecules(molecules) MAX_MOLECULES = len(molecules) if 'TRAVIS' in os.environ: MAX_MOLECULES = 3 molecules = molecules[:MAX_MOLECULES] print(f'{len(molecules)} molecules remain after filtering') # Create template generator with local cache cache = os.path.join(get_data_filename(os.path.join('perses_jacs_systems', system_name)), 'cache.json') generator = self.TEMPLATE_GENERATOR(molecules=molecules, cache=cache) # Create a ForceField forcefield = ForceField() # Register the template generator forcefield.registerTemplateGenerator(generator.generator) # Parameterize all molecules print(f'Caching all molecules for {system_name} at {cache} ...') n_success = 0 n_failure = 0 for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() try: forcefield.createSystem(openmm_topology, nonbondedMethod=NoCutoff) n_success += 1 except Exception as e: n_failure += 1 print(e) print(f'{n_failure}/{n_success+n_failure} ligands failed to parameterize for {system_name}')
def main(sdf_path): if not os.path.isfile(sdf_path): raise ValueError(f"File {sdf_path} is not available.") molecules = Molecule.from_file(sdf_path, allow_undefined_stereo=True) smiles = [m.to_smiles() for m in molecules] base, ext = os.path.splitext(sdf_path) with open(f"{base}.smi", "w") as f: f.write("\n".join(smiles)) return smiles
def main(): args = parse_cli() if args.compounds.endswith(".sdf"): print("Building from SDF...") my_compounds = Molecule.from_file(args.compounds, allow_undefined_stereo=True) else: my_compounds = smiles_from_txt(args.compounds) already_submitted = currently_submitted() return filter_submitted(my_compounds, already_submitted)
def test_pre_run_check_no_opt(): """ Make sure that the pre run check throws an error if there is no optimiser. """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") with pytest.raises(OptimizerError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)
def get_molecule(self): """ Get molecule object with coordinates of the docked ligand :return: file path as string """ if self._molecule is None: filename = self.get_coordinate_file_path() self._molecule = Molecule.from_file(filename, "sdf") return self._molecule
def test_optimization_submissions_with_constraints(fractal_compute_server): """ Make sure that the constraints are added to the optimization and enforced. """ client = FractalClient(fractal_compute_server) ethane = Molecule.from_file(get_data("ethane.sdf"), "sdf") factory = OptimizationDatasetFactory() dataset = OptimizationDataset( dataset_name="Test optimizations with constraint", description="Test optimization dataset with constraints", tagline="Testing optimization datasets") # add just mm spec dataset.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="default", spec_description="mm default spec", overwrite=True) # build some constraints constraints = Constraints() constraints.add_set_constraint(constraint_type="dihedral", indices=[2, 0, 1, 5], value=60, bonded=True) constraints.add_freeze_constraint(constraint_type="distance", indices=[0, 1], bonded=True) # add the molecule attributes = factory.create_cmiles_metadata(ethane) index = ethane.to_smiles() dataset.add_molecule(index=index, molecule=ethane, attributes=attributes, constraints=constraints) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) record = ds.get_record(ds.df.index[0], "default") assert "constraints" in record.keywords assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 # now make sure the constraints worked final_molecule = record.get_final_molecule() assert pytest.approx(60, final_molecule.measure((2, 0, 1, 5))) assert pytest.approx(record.get_initial_molecule().measure((0, 1)), final_molecule.measure((0, 1)))
def test_normal_fragmentation(): """ Test that a molecule can be fragmented successfully and produce the expected results. """ # bace can be fragmented into 3 parts 2 of which are the same engine = WBOFragmenter() engine.keep_non_rotor_ring_substituents = False bace = Molecule.from_file(file_path=get_data("bace_parent.sdf"), file_format="sdf") fragment_data = engine.fragment(molecule=bace) assert len(fragment_data) == 3 fragments = [fragment.fragment_molecule for fragment in fragment_data] # make sure the fragments are correct for fragment in ["bace_frag1.sdf", "bace_frag2.sdf"]: frag_mol = Molecule.from_file(file_path=get_data(fragment), file_format="sdf") assert frag_mol in fragments # make sure all of the central bonds are different torsions = set([fragment.parent_torsion for fragment in fragment_data]) assert len(torsions) == 3
def test_label_molecule(): """ Test that labeling a molecule with the editor works. """ ff = ForceFieldEditor(forcefield_name="openff-1.0.0.offxml") ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") labels = ff.label_molecule(molecule=ethane) for param_type in ["Bonds", "Angles", "ProperTorsions", "ImproperTorsions", "vdW"]: assert param_type in labels
def test_pre_run_check_no_target(): """ Make sure that the pre run check catches if there are no targets set up """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") fb = ForceBalanceOptimizer() workflow.set_optimizer(optimizer=fb) with pytest.raises(OptimizerError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)
def test_torsiondrive_linear_torsion(): """ Test the torsiondrive factorys ability to find linear bonds which should not be driven. """ factory = TorsiondriveDatasetFactory() molecules = Molecule.from_file(get_data("linear_molecules.sdf"), "sdf", allow_undefined_stereo=True) for molecule in molecules: assert bool(factory._detect_linear_torsions(molecule)) is True
def test_missing_task_type(): """ Make sure an error is raised if we do not know how to generate the task. """ target = DummyTarget() target.collection_workflow = "test" molecule = Molecule.from_file(get_data("ethanol.sdf")) with pytest.raises(NotImplementedError): _ = target.generate_fitting_task( molecule=molecule, fragment=False, attributes=get_molecule_cmiles(molecule), dihedrals=[(8, 2, 1, 0)])
def test_pre_run_check_no_smirks(): """ Make sure that the pre run check checks that some target smirks have been supplied. """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) workflow.target_smirks = [] with pytest.raises(TargetNotSetError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)
def test_pre_run_check_no_params(): """ Make sure that the pre run check catches if we have not set any parameters to optimise, like bond length. """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) workflow.target_parameters = [] with pytest.raises(TargetNotSetError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)
def test_generate_fitting_task(collection_workflow): """ Make sure the correct fitting task is made based on the collection workflow. """ target = DummyTarget() target.collection_workflow = collection_workflow molecule = Molecule.from_file(get_data("ethanol.sdf")) task_schema = target.generate_fitting_task( molecule=molecule, fragment=False, attributes=get_molecule_cmiles(molecule), dihedrals=[(8, 2, 1, 0)]) assert task_schema.task_type == collection_workflow
def test_pre_run_check_no_frag(): """ Make sure the pre run check catches if there is no fragmentation method set. """ workflow = WorkflowFactory() ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf") fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) workflow.fragmentation_engine = None with pytest.raises(FragmenterError): _ = workflow.fitting_schema_from_molecules(molecules=ethane)