def test_grid_optimization_dataset(fractal_compute_server): client = ptl.FractalClient(fractal_compute_server) ds = ptl.collections.GridOptimizationDataset("testing", client=client) opt_spec = {"program": "geometric", "keywords": {}} qc_spec = {"driver": "gradient", "method": "UFF", "program": "rdkit"} ds.add_specification("test", opt_spec, qc_spec) hooh1 = ptl.data.get_molecule("hooh.json") scans = [{ "type": "dihedral", "indices": [0, 1, 2, 3], "steps": [-10, 10], "step_type": "relative" }] ds.add_entry("hooh1", hooh1, scans=scans, preoptimization=False) ds.compute("test") fractal_compute_server.await_services() ds.query("test") assert ds.get_record("hooh1", "test").status == "COMPLETE"
def test_torsiondrive_dataset(fractal_compute_server): client = ptl.FractalClient(fractal_compute_server) ds = ptl.collections.TorsionDriveDataset("testing", client=client) hooh1 = ptl.data.get_molecule("hooh.json") hooh2 = hooh1.copy(update={"geometry": hooh1.geometry + np.array([0, 0, 0.2])}) ds.add_entry("hooh1", [hooh1], [[0, 1, 2, 3]], [90], attributes={"something": "hooh1"}) ds.add_entry("hooh2", [hooh2], [[0, 1, 2, 3]], [90], attributes={"something": "hooh2"}) optimization_spec = { "program": "geometric", "keywords": { "coordsys": "tric", } } qc_spec = { "driver": "gradient", "method": "UFF", "basis": "", "keywords": None, "program": "rdkit", } ds.add_specification("spec1", optimization_spec, qc_spec, description="This is a really cool spec") ncompute = ds.compute("spec1") assert ncompute == 2 ds.save() fractal_compute_server.await_services(max_iter=5) ds = client.get_collection("torsiondrivedataset", "testing") ds.query("spec1") # Add another fake set, should instantly return ds.add_specification("spec2", optimization_spec, qc_spec, description="This is a really cool spec") # Test subsets ncompute = ds.compute("spec2", subset=set()) assert ncompute == 0 ncompute = ds.compute("spec2") assert ncompute == 2 ds.query("spec2") # We effectively computed the same thing twice with two duplicate specs for row in ["hooh1", "hooh2"]: for spec in ["spec1", "spec2"]: assert pytest.approx(ds.df.loc["hooh1", "spec2"].get_final_energies(90), 1.e-5) == 0.00015655375994799847 assert ds.status().loc["COMPLETE", "spec1"] == 2 assert ds.status(collapse=False).loc["hooh1", "spec1"] == "COMPLETE" assert ds.counts("hooh1").loc["hooh1", "spec1"] > 5 assert ds.counts("hooh1", specs="spec1", count_gradients=True).loc["hooh1", "spec1"] > 30
def test_service_gridoptimization_single_noopt(fractal_compute_server): client = ptl.FractalClient(fractal_compute_server) # Add a HOOH hooh = ptl.data.get_molecule("hooh.json") initial_distance = hooh.measure([1, 2]) # Options service = GridOptimizationInput(**{ "keywords": { "preoptimization": False, "scans": [{ "type": "distance", "indices": [1, 2], "steps": [-0.1, 0.0], "step_type": "relative" }] }, "optimization_spec": { "program": "geometric", "keywords": { "coordsys": "tric", } }, "qc_spec": { "driver": "gradient", "method": "UFF", "basis": "", "keywords": None, "program": "rdkit", }, "initial_molecule": hooh, }) # yapf: disable ret = client.add_service([service]) fractal_compute_server.await_services() assert len(fractal_compute_server.list_current_tasks()) == 0 result = client.query_procedures(id=ret.ids)[0] assert result.status == "COMPLETE" assert result.starting_grid == (1, ) assert pytest.approx(result.get_final_energies((0, )), abs=1.e-4) == 0.00032145876568280524 assert result.starting_molecule == result.initial_molecule # Check initial vs startin molecule assert result.initial_molecule == result.starting_molecule mol = client.query_molecules(id=result.starting_molecule)[0] assert pytest.approx(mol.measure([1, 2])) == initial_distance
def spin_up_test(**keyword_augments): instance_options = copy.deepcopy(torsiondrive_options) recursive_dict_merge(instance_options, keyword_augments) inp = TorsionDriveInput(**instance_options) ret = client.add_service([inp], full_return=True) if ret.meta.n_inserted: # In case test already submitted compute_key = ret.data.ids[0] service = client.query_services(procedure_id=compute_key)[0] assert 'WAITING' in service['status'] fractal_compute_server.await_services() assert len(fractal_compute_server.list_current_tasks()) == 0 return ret.data
def test_torsiondrive_scan_keywords(fractal_compute_server): """ Test running torsiondrives with unique keyword settings which overwrite the global grid spacing and scan range. """ client = FractalClient(fractal_compute_server) molecules = Molecule.from_smiles("CO") factory = TorsiondriveDatasetFactory() factory.clear_qcspecs() factory.add_qc_spec(method="openff_unconstrained-1.1.0", basis="smirnoff", program="openmm", spec_description="scan range test", spec_name="openff-1.1.0") dataset = factory.create_dataset( dataset_name="Torsiondrive scan keywords", molecules=molecules, description="Testing scan keywords which overwrite the global settings", tagline="Testing scan keywords which overwrite the global settings") # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now set the keywords keys = list(dataset.dataset.keys()) entry = dataset.dataset[keys[0]] entry.keywords = {"grid_spacing": [5], "dihedral_ranges": [(-10, 10)]} # now submit dataset.submit(client=client) fractal_compute_server.await_services(max_iter=50) # make sure of the results are complete ds = client.get_collection("TorsionDriveDataset", dataset.dataset_name) # get the entry record = ds.get_record(ds.df.index[0], "openff-1.1.0") assert record.keywords.grid_spacing == [5] assert record.keywords.grid_spacing != dataset.grid_spacing assert record.keywords.dihedral_ranges == [(-10, 10)] assert record.keywords.dihedral_ranges != dataset.dihedral_ranges
def test_service_torsiondrive_service_incomplete(fractal_compute_server, torsiondrive_fixture): hooh = ptl.data.get_molecule("hooh.json") hooh.geometry[0] += 0.00031 spin_up_test, client = torsiondrive_fixture ret = spin_up_test(run_service=False) # Check the blank result = client.query_procedures(id=ret.ids)[0] assert len(result.final_energy_dict) == 0 assert len(result.optimization_history) == 0 assert result.status == "INCOMPLETE" # Update the service, but no compute fractal_compute_server.update_services() result = client.query_procedures(id=ret.ids)[0] status = result.detailed_status() assert result.status == "RUNNING" assert status["incomplete_tasks"] == 1 fractal_compute_server.await_results() # Take a compute step fractal_compute_server.await_services(max_iter=1) result = client.query_procedures(id=ret.ids)[0] status = result.detailed_status() assert status["total_points"] == 4 assert status["computed_points"] == 3 assert status["complete_tasks"] >= 3 assert status["incomplete_tasks"] == 0 assert len(result.final_energy_dict) == 1 # One complete assert len(result.optimization_history) == 3 # Three spawned assert result.minimum_positions["[-90]"] == 0 assert result.status == "RUNNING" # Repeat compute step checking for updates fractal_compute_server.await_services(max_iter=1) result = client.query_procedures(id=ret.ids)[0] assert len(result.final_energy_dict) == 3 assert len(result.optimization_history) == 4 assert result.minimum_positions["[-90]"] == 0 assert result.status == "RUNNING" # Finalize fractal_compute_server.await_services(max_iter=6) result = client.query_procedures(id=ret.ids)[0] assert len(result.final_energy_dict) == 4 assert len(result.optimization_history) == 4 assert result.minimum_positions["[-90]"] == 2 assert result.status == "COMPLETE"
def test_service_gridoptimization_single_opt(fractal_compute_server): client = ptl.FractalClient(fractal_compute_server) # Add a HOOH hooh = ptl.data.get_molecule("hooh.json") initial_distance = hooh.measure([1, 2]) mol_ret = client.add_molecules([hooh]) # Options service = GridOptimizationInput(**{ "keywords": { "preoptimization": True, "scans": [{ "type": "distance", "indices": [1, 2], "steps": [-0.1, 0.0], "step_type": "relative" }, { "type": "dihedral", "indices": [0, 1, 2, 3], "steps": [-90, 0], "step_type": "absolute" }] }, "optimization_spec": { "program": "geometric", "keywords": { "coordsys": "tric", } }, "qc_spec": { "driver": "gradient", "method": "UFF", "basis": "", "keywords": None, "program": "rdkit", }, "initial_molecule": mol_ret[0], }) # yapf: disable ret = client.add_service([service], tag="gridopt", priority="low") fractal_compute_server.await_services() assert len(fractal_compute_server.list_current_tasks()) == 0 result = client.query_procedures(id=ret.ids)[0] assert result.status == "COMPLETE" assert result.starting_grid == (1, 0) assert pytest.approx(result.get_final_energies((0, 0)), abs=1.e-4) == 0.0010044105443485617 assert pytest.approx(result.get_final_energies((1, 1)), abs=1.e-4) == 0.0026440964897817623 assert result.starting_molecule != result.initial_molecule # Check initial vs startin molecule assert result.initial_molecule == mol_ret[0] starting_mol = client.query_molecules(id=result.starting_molecule)[0] assert pytest.approx(starting_mol.measure([1, 2])) != initial_distance assert pytest.approx(starting_mol.measure([1, 2])) == 2.488686479260597 # Check tags on individual procedures proc_id = list(result.grid_optimizations.values())[0] opt = client.query_procedures(id=proc_id)[0] task = client.query_tasks(id=opt.task_id)[0] assert task.priority == 0 assert task.tag == "gridopt"
def test_torsiondrive_submissions(fractal_compute_server, specification): """ Test submitting a torsiondrive dataset and computing it. """ client = FractalClient(fractal_compute_server) qc_spec, driver = specification program = qc_spec["program"] if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_smiles("CO") factory = TorsiondriveDatasetFactory(driver=driver) factory.add_qc_spec(**qc_spec, spec_name="default", spec_description="test", overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test torsiondrives info {program}, {driver}", molecules=molecules, description="Test torsiondrive dataset", tagline="Testing torsiondrive datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_services(max_iter=50) # make sure of the results are complete ds = client.get_collection("TorsionDriveDataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == dataset.maxiter assert keywords.values["scf_properties"] == dataset.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default # this will take some time so make sure it is running with no error assert record.status.value == "COMPLETE", print(record.dict()) assert record.error is None assert len(record.final_energy_dict) == 24
def test_adding_compute(fractal_compute_server, dataset_data): """ Test adding new compute to each of the dataset types using none psi4 programs. """ client = FractalClient(fractal_compute_server) mol = Molecule.from_smiles("CO") factory_type, dataset_type = dataset_data # make and clear out the qc specs factory = factory_type() factory.clear_qcspecs() factory.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="default", spec_description="default spec for openff") dataset = factory.create_dataset( dataset_name=f"Test adding compute to {factory_type}", molecules=mol, description=f"Testing adding compute to a {dataset_type} dataset", tagline="tests for adding compute.") # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) # make sure that the compute has finished fractal_compute_server.await_results() fractal_compute_server.await_services(max_iter=50) # now lets make a dataset with new compute and submit it # transfer the metadata to compare the elements compute_dataset = dataset_type(dataset_name=dataset.dataset_name, metadata=dataset.metadata) compute_dataset.clear_qcspecs() # now add the new compute spec compute_dataset.add_qc_spec(method="uff", basis=None, program="rdkit", spec_name="rdkit", spec_description="rdkit basic spec") # make sure the dataset has no molecules and submit it assert compute_dataset.dataset == {} compute_dataset.submit(client=client) # make sure that the compute has finished fractal_compute_server.await_results() fractal_compute_server.await_services(max_iter=50) # make sure of the results are complete ds = client.get_collection(dataset.dataset_type, dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # update all specs into one dataset dataset.add_qc_spec(**compute_dataset.qc_specifications["rdkit"].dict()) # get the last ran spec if dataset.dataset_type == "DataSet": for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None else: # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == dataset.maxiter assert keywords.values["scf_properties"] == dataset.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default # this will take some time so make sure it is running with no error assert record.status.value == "COMPLETE", print(record.dict()) assert record.error is None
def test_adding_specifications(fractal_compute_server): """ Test adding specifications to datasets. Here we are testing multiple scenarios: 1) Adding an identical specification to a dataset 2) Adding a spec with the same name as another but with different options 3) overwrite a spec which was added but never used. """ client = FractalClient(fractal_compute_server) mol = Molecule.from_smiles("CO") # make a dataset factory = OptimizationDatasetFactory() opt_dataset = factory.create_dataset( dataset_name="Specification error check", molecules=mol, description="test adding new compute specs to datasets", tagline="test adding new compute specs") opt_dataset.clear_qcspecs() # add a new mm spec opt_dataset.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_description="default openff spec", spec_name="openff-1.0.0") opt_dataset.metadata.long_description_url = "https://test.org" # submit the optimizations and let the compute run opt_dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() fractal_compute_server.await_services() # grab the collection ds = client.get_collection(opt_dataset.dataset_type, opt_dataset.dataset_name) # now try and add the specification again this should return True assert opt_dataset.add_dataset_specification( spec=opt_dataset.qc_specifications["openff-1.0.0"], opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(), collection=ds) is True # now change part of the spec but keep the name the same opt_dataset.clear_qcspecs() opt_dataset.add_qc_spec(method="openff-1.2.1", basis="smirnoff", spec_name="openff-1.0.0", program="openmm", spec_description="openff-1.2.1 with wrong name.") # now try and add this specification with the same name but different settings with pytest.raises(QCSpecificationError): opt_dataset.add_dataset_specification( spec=opt_dataset.qc_specifications["openff-1.0.0"], opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(), collection=ds) # now add a new specification but no compute and make sure it is overwritten opt_dataset.clear_qcspecs() opt_dataset.add_qc_spec(method="ani1x", basis=None, program="torchani", spec_name="ani", spec_description="a ani spec") assert opt_dataset.add_dataset_specification( spec=opt_dataset.qc_specifications["ani"], opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(), collection=ds) is True # now change the spec slightly and add again opt_dataset.clear_qcspecs() opt_dataset.add_qc_spec(method="ani1ccx", basis=None, program="torchani", spec_name="ani", spec_description="a ani spec") assert opt_dataset.add_dataset_specification( spec=opt_dataset.qc_specifications["ani"], opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(), collection=ds) is True
def test_compute_openffworkflow(fractal_compute_server): """ Tests the openffworkflow collection """ # Obtain a client and build a BioFragment client = ptl.FractalClient(fractal_compute_server) openff_workflow_options = { # Blank Fragmenter options "enumerate_states": {}, "enumerate_fragments": {}, "torsiondrive_input": {}, # TorsionDriveRecord, Geometric, and QC options "torsiondrive_static_options": { "keywords": {}, "optimization_spec": { "program": "geometric", "keywords": { "coordsys": "tric", } }, "qc_spec": { "driver": "gradient", "method": "UFF", "basis": "", "keywords": None, "program": "rdkit", } }, "optimization_static_options": { "program": "geometric", "keywords": { "coordsys": "tric", }, "qc_spec": { "driver": "gradient", "method": "UFF", "basis": "", "keywords": None, "program": "rdkit", } } } wf = ptl.collections.OpenFFWorkflow("Workflow1", client=client, **openff_workflow_options) # # Add a fragment and wait for the compute hooh = ptl.data.get_molecule("hooh.json") fragment_input = { "label1": { "type": "torsiondrive_input", "initial_molecule": hooh.json_dict(), "grid_spacing": [90], "dihedrals": [[0, 1, 2, 3]], }, } wf.add_fragment("HOOH", fragment_input) assert set(wf.list_fragments()) == {"HOOH"} fractal_compute_server.await_services(max_iter=5) final_energies = wf.list_final_energies() assert final_energies.keys() == {"HOOH"} assert final_energies["HOOH"].keys() == {"label1"} final_molecules = wf.list_final_molecules() assert final_molecules.keys() == {"HOOH"} assert final_molecules["HOOH"].keys() == {"label1"} optimization_input = { "label2": { "type": "optimization_input", "initial_molecule": hooh.json_dict(), "constraints": { 'set': [{ "type": 'dihedral', "indices": [0, 1, 2, 3], "value": 0 }] } } } wf.add_fragment("HOOH", optimization_input) fractal_compute_server.await_services(max_iter=5) final_energies = wf.list_final_energies() assert final_energies["HOOH"].keys() == {"label1", "label2"} assert pytest.approx(0.00259754, 1.e-4) == final_energies["HOOH"]["label2"] final_molecules = wf.list_final_molecules() assert final_molecules.keys() == {"HOOH"} assert final_molecules["HOOH"].keys() == {"label1", "label2"} # Add a second fragment butane = ptl.data.get_molecule("butane.json") butane_id = butane.identifiers.canonical_isomeric_explicit_hydrogen_mapped_smiles fragment_input = { "label1": { "type": "torsiondrive_input", "initial_molecule": butane.json_dict(), "grid_spacing": [90], "dihedrals": [[0, 2, 3, 1]], }, } wf.add_fragment(butane_id, fragment_input) assert set(wf.list_fragments()) == {butane_id, "HOOH"} final_energies = wf.list_final_energies() assert final_energies.keys() == {butane_id, "HOOH"} assert final_energies[butane_id].keys() == {"label1"} assert final_energies[butane_id]["label1"] == {}
def test_service_gridoptimization_single_opt(fractal_compute_server): client = ptl.FractalClient(fractal_compute_server) # Add a HOOH hooh = ptl.data.get_molecule("hooh.json") initial_distance = hooh.measure([1, 2]) mol_ret = client.add_molecules([hooh]) # Options service = GridOptimizationInput( **{ "keywords": { "preoptimization": True, "scans": [ {"type": "distance", "indices": [1, 2], "steps": [-0.1, 0.0], "step_type": "relative"}, {"type": "dihedral", "indices": [0, 1, 2, 3], "steps": [-90, 0], "step_type": "absolute"}, ], }, "optimization_spec": {"program": "geometric", "keywords": {"coordsys": "tric"}}, "qc_spec": {"driver": "gradient", "method": "UFF", "basis": "", "keywords": None, "program": "rdkit"}, "initial_molecule": mol_ret[0], } ) # yapf: disable ret = client.add_service([service], tag="gridopt", priority="low") fractal_compute_server.await_services(max_iter=1) result = client.query_procedures(id=ret.ids)[0] assert result.grid_optimizations.keys() == {'"preoptimization"'} assert result.status == "RUNNING" fractal_compute_server.await_services(max_iter=1) result = client.query_procedures(id=ret.ids)[0] status = result.detailed_status() assert status["total_points"] == 5 assert status["complete_tasks"] == 2 assert result.grid_optimizations.keys() == {'"preoptimization"', "[1, 0]"} assert result.status == "RUNNING" fractal_compute_server.await_services(max_iter=1) result = client.query_procedures(id=ret.ids)[0] assert result.grid_optimizations.keys() == {'"preoptimization"', "[1, 0]", "[0, 0]", "[1, 1]"} assert result.status == "RUNNING" fractal_compute_server.await_services(max_iter=6) result = client.query_procedures(id=ret.ids)[0] status = result.detailed_status() assert status["complete_tasks"] == 5 assert result.status == "COMPLETE" assert result.starting_grid == (1, 0) assert pytest.approx(result.get_final_energies((0, 0)), abs=1.0e-4) == 0.0010044105443485617 assert pytest.approx(result.get_final_energies((1, 1)), abs=1.0e-4) == 0.0026440964897817623 assert result.starting_molecule != result.initial_molecule # Check initial vs starting molecule assert result.initial_molecule == mol_ret[0] starting_mol = client.query_molecules(id=result.starting_molecule)[0] assert pytest.approx(starting_mol.measure([1, 2])) != initial_distance assert pytest.approx(starting_mol.measure([1, 2])) == 2.488686479260597 # Check tags on individual procedures proc_id = result.grid_optimizations["[0, 0]"] # completed tasks should be deleted task = client.query_tasks(base_result=proc_id) assert not task # Check final ResultRecords final_result_records = result.get_final_results() assert len(final_result_records) == 5 assert final_result_records["preoptimization"].molecule == result.starting_molecule # Pull the full history preopt = result.get_history()["preoptimization"] assert preopt.initial_molecule == result.initial_molecule assert preopt.final_molecule == result.starting_molecule