def test_make_fitting_schema_from_results(): """ Test that new fitting schemas can be made from results and that all results are full """ # build the workflow workflow = WorkflowFactory() fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) # set up the client and load the results # load a client and pull some results client = FractalClient() # grab a dataset with small fragments in it result = TorsionDriveCollectionResult.from_server( client=client, spec_name="default", dataset_name="OpenFF-benchmark-ligand-fragments-v1.0", final_molecule_only=True, subset=bace_entries) schema = workflow.fitting_schema_from_results(results=result, combine=True) # there should be 2 total molecules as we have combined two results assert schema.n_molecules == 2 # there are a total of 3 torsiondrives assert schema.n_tasks == 3 # make sure each task has results and is ready to fit for task in schema.tasks: assert task.ready_for_fitting is True
def test_optimizer_explicit(): """ Run the optimizer process in the main thread to make sure it works. """ biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now make the schema schema = get_fitting_schema(molecules=biphenyl) result = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) schema.update_with_results(results=result) # now submit to the executor execute = Executor() # we dont need the server here # put a task in the opt queue then kill it execute.total_tasks = 1 execute.opt_queue.put(schema.tasks[0]) with temp_directory(): execute.optimizer() # find the task in the finished queue task = execute.finished_tasks.get() result_schema = execute.update_fitting_schema(task=task, fitting_schema=schema) smirks = result_schema.tasks[0].final_smirks # make sure they have been updated for smirk in smirks: for term in smirk.terms.values(): assert float(term.k.split()[0]) != 1e-5
def test_torsiondrivedataset_new_optimization(public_client): """ Tast making a new optimization dataset of constrained optimizations from the results of a torsiondrive dataset. """ result = TorsionDriveCollectionResult.from_server( client=public_client, spec_name="default", dataset_name="TorsionDrive Paper", include_trajectory=True, final_molecule_only=False, subset=["[ch2:3]([ch2:2][oh:4])[oh:1]_12"]) # make a new torsiondrive dataset new_dataset = result.create_optimization_dataset( dataset_name="new optimization dataset", description="a test optimization dataset", tagline="a test optimization dataset.") assert new_dataset.dataset_name == "new optimization dataset" assert new_dataset.n_molecules == 1 assert new_dataset.n_records == 24 dihedrals = set() for entry in new_dataset.dataset.values(): assert entry.constraints.has_constraints is True assert len(entry.constraints.set) == 1 dihedrals.add(entry.constraints.set[0].value) # now sort the dihedrals and make sure they are all present dihs = sorted(dihedrals) refs = [x for x in range(-165, 195, 15)] assert dihs == refs
def test_task_from_results(): """ Test making an individual task from a set of results """ # load a client and pull some results client = FractalClient() # grab a dataset with small fragments in it result = TorsionDriveCollectionResult.from_server( client=client, spec_name="default", dataset_name="OpenFF-benchmark-ligand-fragments-v1.0", final_molecule_only=True, subset=bace_entries[:1]) # grab the only result result = list(result.collection.values())[0] # set up the workflow workflow = WorkflowFactory() fb = ForceBalanceOptimizer() fb.set_optimization_target(target=AbInitio_SMIRNOFF()) workflow.set_optimizer(optimizer=fb) # this should be a simple biphenyl molecule opt_schema = workflow._task_from_results(results=[ result, ], index=1) assert opt_schema.initial_forcefield == workflow.initial_forcefield assert opt_schema.optimizer_name == fb.optimizer_name assert opt_schema.job_id == "bespoke_task_1" assert bool(opt_schema.target_smirks) is True assert opt_schema.target_parameters == workflow.target_parameters assert result.molecule == opt_schema.target_molecule.molecule assert opt_schema.n_tasks == 1 assert opt_schema.n_targets == 1 assert opt_schema.ready_for_fitting is True
def test_torsionprofile_metadata(): """ Make sure that when using the torsionprofile target we make the metatdat.json file. """ from openff.qcsubmit.serializers import deserialize torsion_target = TorsionProfile_SMIRNOFF() target_schema = biphenyl_target(target=torsion_target) # now load in a scan result we have saved result_data = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) # now try and update the results target_schema.update_with_results(results=result_data) assert target_schema.ready_for_fitting is True # now try and prep for fitting with temp_directory(): torsion_target.prep_for_fitting(fitting_target=target_schema) # we should only have one torsion drive to do here folders = os.listdir(".") assert len(folders) == 1 target_files = os.listdir(folders[0]) assert "molecule.pdb" in target_files assert "scan.xyz" in target_files assert "molecule.mol2" in target_files assert "qdata.txt" in target_files assert "metadata.json" in target_files metadata = deserialize( file_name=os.path.join(folders[0], "metadata.json")) # now make sure the json is complete entry = target_schema.tasks[0] assert entry.dihedrals[0] == tuple(metadata["dihedrals"][0]) for data in entry.reference_data(): assert data.extras["dihedral_angle"] in metadata[ "torsion_grid_ids"]
def test_torsiondrivedataset_export(public_client): """ Make sure that the torsiondrive datasets can be exported. """ with temp_directory(): result = TorsionDriveCollectionResult.from_server( client=public_client, spec_name="default", dataset_name="TorsionDrive Paper", include_trajectory=False, final_molecule_only=True) result.export_results("dataset.json") result2 = TorsionDriveCollectionResult.parse_file("dataset.json") assert result.dict(exclude={"collection"}) == result2.dict( exclude={"collection"}) for molecule in result.collection: assert molecule in result2.collection
def test_abinitio_fitting_prep_no_gradient(): """ Test preparing for fitting using the abinitio target. """ torsion_target = AbInitio_SMIRNOFF() torsion_target.fit_gradient = False target_schema = biphenyl_target(target=torsion_target) biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"), file_format="sdf") # now load in a scan result we have saved result_data = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) # now try and update the results target_schema.update_with_results(results=result_data) assert target_schema.ready_for_fitting is True # now try and prep for fitting with temp_directory(): torsion_target.prep_for_fitting(fitting_target=target_schema) # we should only have one torsion drive to do here folders = os.listdir(".") assert len(folders) == 1 target_files = os.listdir(folders[0]) assert "molecule.pdb" in target_files assert "scan.xyz" in target_files assert "molecule.mol2" in target_files assert "qdata.txt" in target_files # now we need to make sure the pdb order was not changed mol = Molecule.from_file(os.path.join(folders[0], "molecule.pdb"), file_format="pdb") isomorphic, atom_map = Molecule.are_isomorphic(biphenyl, mol, return_atom_map=True) assert isomorphic is True assert atom_map == dict((i, i) for i in range(biphenyl.n_atoms)) # also make sure charges are in the mol2 file mol = Molecule.from_file(os.path.join(folders[0], "molecule.mol2"), "mol2") assert mol.partial_charges is not None # make sure the scan coords and energies match qdata_file = os.path.join(folders[0], "qdata.txt") coords, energies, gradients = read_qdata(qdata_file=qdata_file) # make sure no gradients were written assert not gradients reference_data = target_schema.tasks[0].reference_data() for i, (coord, energy) in enumerate(zip(coords, energies)): # find the reference data data = reference_data[i] assert data.energy == energy assert coord == data.molecule.geometry.flatten().tolist()
def test_torsiondrivedataset_final_result_only(public_client): """ Make sure the final_molecule_only keyword is working """ result = TorsionDriveCollectionResult.from_server( client=public_client, spec_name="default", dataset_name="TorsionDrive Paper", include_trajectory=False, final_molecule_only=True) # now we need to make sure that each optimization traj has only one molecule in it. for torsiondrive in result.collection.values(): for optimization in torsiondrive.optimization.values(): assert len(optimization.trajectory) == 1
def test_torsiondrivedataset_traj_subset(public_client): """ Make sure the full trajectories are pulled when requested for a subset of molecules in a collection. """ result = TorsionDriveCollectionResult.from_server( client=public_client, spec_name="default", dataset_name="TorsionDrive Paper", include_trajectory=True, final_molecule_only=False, subset=["[ch2:3]([ch2:2][oh:4])[oh:1]_12"]) # make sure one torsiondrive was pulled down assert len(result.collection) == 1 # now make sure the full trajectory is pulled torsiondrive = result.collection["[ch2:3]([ch2:2][oh:4])[oh:1]_12"] for optimization in torsiondrive.optimization.values(): assert len(optimization.trajectory) > 2
def test_torsiondrivedataset_result_default(public_client): """ Test downloading a basic torsiondrive dataset from the archive. """ import numpy as np from simtk import unit result = TorsionDriveCollectionResult.from_server( client=public_client, spec_name="default", dataset_name="TorsionDrive Paper", include_trajectory=False, final_molecule_only=False) # now we need to make sure that each optimization traj has only one molecule in it. for torsiondrive in result.collection.values(): for optimization in torsiondrive.optimization.values(): assert len(optimization.trajectory) == 2 # make sure the three torsiondrives are pulled assert len(result.collection) == 3 # now check the utility functions torsiondrive = result.collection["[ch2:3]([ch2:2][oh:4])[oh:1]_12"] assert torsiondrive.final_energies is not None # make sure there is an energy of every result assert len(torsiondrive.final_energies) == len(torsiondrive.optimization) mol = torsiondrive.molecule molecule = torsiondrive.get_torsiondrive() assert mol == molecule # make sure the conformers are loaded onto the molecule assert molecule.n_conformers == len(torsiondrive.optimization) # now check each conformer ordered_results = torsiondrive.get_ordered_results() for conformer, single_result in zip(molecule.conformers, ordered_results): assert np.allclose( conformer.in_units_of(unit.bohr).tolist(), single_result[1].molecule.geometry.tolist()) # now make sure the lowest energy optimization is recognized lowest_result = torsiondrive.get_lowest_energy_optimisation() all_energies = list(torsiondrive.final_energies.values()) assert lowest_result.final_energy == min(all_energies)
def test_forcebalance_optimize(optimization_target): """ Test running the full optimization stage for a simple biphenyl system using different targets. The data has been extracted from qcarchive. """ from openff.qcsubmit.results import TorsionDriveCollectionResult workflow = biphenyl_workflow(target=optimization_target) with temp_directory(): # load the computed results and add them to the workflow torsiondrive_result = TorsionDriveCollectionResult.parse_file( get_data("biphenyl.json.xz")) workflow.update_with_results(results=torsiondrive_result) # setup the optimizer fb = ForceBalanceOptimizer() result = fb.optimize(schema=workflow) assert result.status == Status.Complete new_smirks = result.target_smirks for smirk in new_smirks: for param in smirk.terms.values(): assert param.k != "1e-05 * mole**-1 * kilocalorie"
def test_sort_results(combine): """ Test sorting the results before making a fitting schema with and without combination. """ # load up the fractal client client = FractalClient() # grab a dataset with bace fragments in it result = TorsionDriveCollectionResult.from_server( client=client, spec_name="default", dataset_name="OpenFF-benchmark-ligand-fragments-v1.0", final_molecule_only=True, subset=bace_entries) workflow = WorkflowFactory() # now sort the results all_results = workflow._sort_results(results=result, combine=combine) if combine: assert len(all_results) == 2 else: assert len(all_results) == 3
def test_torsiondrive_new_basicdataset(public_client): """ Test creating a new basicdataset of the final geometries of the current torsiondrive results class. """ result = TorsionDriveCollectionResult.from_server( client=public_client, spec_name="default", dataset_name="TorsionDrive Paper", include_trajectory=True, final_molecule_only=False, subset=["[ch2:3]([ch2:2][oh:4])[oh:1]_12"]) new_dataset = result.create_basic_dataset(dataset_name="new basicdataset", description="new basicdataset", tagline="new basicdataset", driver="gradient") assert new_dataset.dataset_name == "new basicdataset" assert new_dataset.n_molecules == 1 # make sure all of the molecule geometries are unpacked assert new_dataset.n_records == 24 assert new_dataset.driver.value == "gradient"
def test_torsiondrivedataset_new_torsiondrive(public_client): """ Test making a new torsiondrive dataset from the results of another. """ result = TorsionDriveCollectionResult.from_server( client=public_client, spec_name="default", dataset_name="TorsionDrive Paper", include_trajectory=True, final_molecule_only=False, subset=["[ch2:3]([ch2:2][oh:4])[oh:1]_12"]) # make a new torsiondrive dataset new_dataset = result.create_torsiondrive_dataset( dataset_name="new torsiondrive dataset", description="a test torsiondrive dataset", tagline="a test torsiondrive dataset.") assert new_dataset.dataset_name == "new torsiondrive dataset" assert new_dataset.n_molecules == 1 assert new_dataset.n_records == 1 entry = new_dataset.dataset["[ch2:3]([ch2:2][oh:4])[oh:1]_12"] # make sure all starting molecules are present assert len(entry.initial_molecules) == 24