def test_expanding_compute(fractal_compute_server, factory_type): """ Make sure that if we expand the compute of a dataset tasks are generated. """ client = FractalClient(fractal_compute_server) molecule = Molecule.from_smiles("CC") molecule.generate_conformers(n_conformers=1) factory = factory_type() scan_enum = workflow_components.ScanEnumerator() scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#6:3]~[*:4]") factory.add_workflow_components(scan_enum) factory.clear_qcspecs() # add only mm specs factory.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="default", spec_description="standard parsley spec") dataset = factory.create_dataset( dataset_name=f"Test compute expand {factory.type}", molecules=molecule, description="Test compute expansion", tagline="Testing compute expansion", ) # make sure all expected index get submitted dataset.submit(client=client) # grab the dataset and check the history ds = client.get_collection(dataset.type, dataset.dataset_name) assert ds.data.history == {"default"} # now make another dataset to expand the compute factory.clear_qcspecs() # add only mm specs factory.add_qc_spec(method="openff-1.2.0", basis="smirnoff", program="openmm", spec_name="parsley2", spec_description="standard parsley spec") dataset = factory.create_dataset( dataset_name=f"Test compute expand {factory.type}", molecules=[], description="Test compute expansion", tagline="Testing compute expansion", ) # now submit again dataset.submit(client=client) # now grab the dataset again and check the tasks list ds = client.get_collection(dataset.type, dataset.dataset_name) assert ds.data.history == {"default", "parsley2"} # make sure a record has been made entry = ds.get_entry(ds.df.index[0]) assert "parsley2" in entry.object_map
def get_optimization_tracebacks(self, fractal_uri, dataset_name, client=None, compute_specs=None, molids=None): if client is None: client = FractalClient(fractal_uri, verify=False) optds = client.get_collection("OptimizationDataset", dataset_name) optds.status() df = optds.df.sort_index(ascending=True) if (molids is not None) and (len(molids) != 0): df = df.loc[list(molids)] if compute_specs is not None: df = df[compute_specs] errors = df.applymap(lambda x: x.get_error().error_message if x.status == 'ERROR' else None) # filter down to only those rows with errors errors = errors.dropna(how='all') return errors
def get_optimization_from_server(self, fractal_uri, dataset_name, client=None, compute_specs=None, molids=None): """Get full optimization data from the given molecules. """ if client is None: client = FractalClient(fractal_uri, verify=False) optds = client.get_collection("OptimizationDataset", dataset_name) optds.status() df = optds.df if (molids is not None) and (len(molids) != 0): df = df.loc[list(molids)] if compute_specs is not None: df = df[compute_specs] out = [] for opt in df.values.flatten(): if opt.status != 'COMPLETE': continue optd = self._get_complete_optimization_result(opt, client) out.append(optd) return out
def test_optimization_submissions_with_constraints(fractal_compute_server): """ Make sure that the constraints are added to the optimization and enforced. """ client = FractalClient(fractal_compute_server) ethane = Molecule.from_file(get_data("ethane.sdf"), "sdf") factory = OptimizationDatasetFactory() dataset = OptimizationDataset( dataset_name="Test optimizations with constraint", description="Test optimization dataset with constraints", tagline="Testing optimization datasets") # add just mm spec dataset.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="default", spec_description="mm default spec", overwrite=True) # build some constraints constraints = Constraints() constraints.add_set_constraint(constraint_type="dihedral", indices=[2, 0, 1, 5], value=60, bonded=True) constraints.add_freeze_constraint(constraint_type="distance", indices=[0, 1], bonded=True) # add the molecule attributes = factory.create_cmiles_metadata(ethane) index = ethane.to_smiles() dataset.add_molecule(index=index, molecule=ethane, attributes=attributes, constraints=constraints) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) record = ds.get_record(ds.df.index[0], "default") assert "constraints" in record.keywords assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 # now make sure the constraints worked final_molecule = record.get_final_molecule() assert pytest.approx(60, final_molecule.measure((2, 0, 1, 5))) assert pytest.approx(record.get_initial_molecule().measure((0, 1)), final_molecule.measure((0, 1)))
def set_optimization_tag(self, fractal_uri, tag, dataset_name): from qcportal.models.task_models import PriorityEnum client = FractalClient(fractal_uri, verify=False) optds = client.get_collection("OptimizationDataset", dataset_name) optds.status() opts = optds.df.values.flatten() optids = [opt.id for opt in opts if opt.status != 'COMPLETE'] for id in optids: client.modify_tasks(operation='modify', base_result=id, new_tag=tag)
def test_index_not_changed(fractal_compute_server, factory_type): """ Make sure that when we submit molecules from a dataset/optimizationdataset with one input conformer that the index is not changed. """ factory = factory_type() factory.clear_qcspecs() client = FractalClient(fractal_compute_server) # add only mm specs factory.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="parsley", spec_description="standard parsley spec") molecule = Molecule.from_smiles("C") # make sure we only have one conformer molecule.generate_conformers(n_conformers=1) dataset = factory.create_dataset( dataset_name=f"Test index change for {factory.factory_type}", molecules=molecule, description="Test index change dataset", tagline="Testing index changes datasets", ) dataset.metadata.long_description_url = "https://test.org" # now change the index name to something unique entry = dataset.dataset.pop(list(dataset.dataset.keys())[0]) entry.index = "my_unique_index" dataset.dataset[entry.index] = entry dataset.submit(client=client) # pull the dataset and make sure our index is present ds = client.get_collection(dataset.dataset_type, dataset.dataset_name) if dataset.dataset_type == "DataSet": query = ds.get_records(method="openff-1.0.0", basis="smirnoff", program="openmm") assert "my_unique_index" in query.index else: assert "my_unique_index" in ds.df.index
def set_optimization_priority(self, fractal_uri, priority, dataset_name): from qcportal.models.task_models import PriorityEnum client = FractalClient(fractal_uri, verify=False) optds = client.get_collection("OptimizationDataset", dataset_name) optds.status() opts = optds.df.values.flatten() priority_map = { "high": PriorityEnum.HIGH, "normal": PriorityEnum.NORMAL, "low": PriorityEnum.LOW } optids = [opt.id for opt in opts if opt.status != 'COMPLETE'] for id in optids: client.modify_tasks(operation='modify', base_result=id, new_priority=priority_map[priority])
def test_torsiondrive_scan_keywords(fractal_compute_server): """ Test running torsiondrives with unique keyword settings which overwrite the global grid spacing and scan range. """ client = FractalClient(fractal_compute_server) molecules = Molecule.from_smiles("CO") factory = TorsiondriveDatasetFactory() scan_enum = workflow_components.ScanEnumerator() scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#8:3]~[*:4]") factory.add_workflow_components(scan_enum) factory.clear_qcspecs() factory.add_qc_spec(method="openff_unconstrained-1.1.0", basis="smirnoff", program="openmm", spec_description="scan range test", spec_name="openff-1.1.0") dataset = factory.create_dataset( dataset_name="Torsiondrive scan keywords", molecules=molecules, description="Testing scan keywords which overwrite the global settings", tagline="Testing scan keywords which overwrite the global settings") # now set the keywords keys = list(dataset.dataset.keys()) entry = dataset.dataset[keys[0]] entry.keywords = {"grid_spacing": [5], "dihedral_ranges": [(-10, 10)]} # now submit dataset.submit(client=client) fractal_compute_server.await_services(max_iter=50) # make sure of the results are complete ds = client.get_collection("TorsionDriveDataset", dataset.dataset_name) # get the entry record = ds.get_record(ds.df.index[0], "openff-1.1.0") assert record.keywords.grid_spacing == [5] assert record.keywords.grid_spacing != dataset.grid_spacing assert record.keywords.dihedral_ranges == [(-10, 10)] assert record.keywords.dihedral_ranges != dataset.dihedral_ranges
def errorcycle_optimizations(self, fractal_uri, dataset_name, client=None, compute_specs=None, molids=None): """Restart optimizations that have failed. Parameters ---------- compute_specs : iterable Iterable of compute spec names to error cycle only. molids : iterable Iterable of molecule ids to error cycle only. """ if client is None: client = FractalClient(fractal_uri, verify=False) optds = client.get_collection("OptimizationDataset", dataset_name) optds.status() df = optds.df if (molids is not None) and (len(molids) != 0): df = df.loc[list(molids)] if compute_specs is not None: df = df[compute_specs] for opt in df.values.flatten(): if opt.status == 'ERROR': client.modify_tasks(operation='restart', base_result=opt.id) print(f"Restarted ERRORed optimization `{opt.id}`") if opt.status == 'INCOMPLETE' and (opt.final_molecule is not None): client.modify_tasks(operation='regenerate', base_result=opt.id) print(f"Regnerated INCOMPLETE optimization `{opt.id}`")
def get_optimization_status(self, fractal_uri, dataset_name, client=None, compute_specs=None, molids=None): """Get status of optimization for each molecule ID. """ if client is None: client = FractalClient(fractal_uri, verify=False) optds = client.get_collection("OptimizationDataset", dataset_name) optds.status() df = optds.df.sort_index(ascending=True) if (molids is not None) and (len(molids) != 0): df = df.loc[list(molids)] if compute_specs is not None: df = df[compute_specs] return df
def test_optimization_submissions(fractal_compute_server, specification): """Test submitting an Optimization dataset to a snowflake server.""" client = FractalClient(fractal_compute_server) qc_spec, driver = specification program = qc_spec["program"] if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = OptimizationDatasetFactory(driver=driver) factory.add_qc_spec(**qc_spec, spec_name="default", spec_description="test", overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test optimizations info {program}, {driver}", molecules=molecules[:2], description="Test optimization dataset", tagline="Testing optimization datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == dataset.maxiter assert keywords.values["scf_properties"] == dataset.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 # if we used psi4 make sure the properties were captured if program == "psi4": result = record.get_trajectory()[0] assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys() assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys()
def test_basic_submissions_wavefunction(fractal_compute_server): """ Test submitting a basic dataset with a wavefunction protocol and make sure it is executed. """ # only a psi4 test if not has_program("psi4"): pytest.skip(f"Program psi4 not found.") client = FractalClient(fractal_compute_server) molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.clear_qcspecs() factory.add_qc_spec(method="hf", basis="sto-6g", program="psi4", spec_name="default", spec_description="wavefunction spec", store_wavefunction="orbitals_and_eigenvalues") dataset = factory.create_dataset( dataset_name=f"Test single points with wavefunction", molecules=molecules, description="Test basics dataset", tagline="Testing single point datasets with wavefunction", ) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # submit the dataset # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None basis = result.get_wavefunction("basis") assert basis.name.lower() == "sto-6g" orbitals = result.get_wavefunction("orbitals_a") assert orbitals.shape is not None
def test_basic_submissions_single_spec(fractal_compute_server, specification): """Test submitting a basic dataset to a snowflake server.""" client = FractalClient(fractal_compute_server) qc_spec, driver = specification program = qc_spec["program"] if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver=driver) factory.add_qc_spec(**qc_spec, spec_name="default", spec_description="testing the single points", overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test single points info {program}, {driver}", molecules=molecules, description="Test basics dataset", tagline="Testing single point datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis break else: raise RuntimeError( f"The requested compute was not found in the history {ds.data.history}" ) for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None
def test_adding_compute(fractal_compute_server, dataset_data): """ Test adding new compute to each of the dataset types using none psi4 programs. """ client = FractalClient(fractal_compute_server) mol = Molecule.from_smiles("CO") factory_type, dataset_type = dataset_data # make and clear out the qc specs factory = factory_type() factory.clear_qcspecs() factory.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="default", spec_description="default spec for openff") dataset = factory.create_dataset( dataset_name=f"Test adding compute to {factory_type}", molecules=mol, description=f"Testing adding compute to a {dataset_type} dataset", tagline="tests for adding compute.") # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) # make sure that the compute has finished fractal_compute_server.await_results() fractal_compute_server.await_services(max_iter=50) # now lets make a dataset with new compute and submit it # transfer the metadata to compare the elements compute_dataset = dataset_type(dataset_name=dataset.dataset_name, metadata=dataset.metadata) compute_dataset.clear_qcspecs() # now add the new compute spec compute_dataset.add_qc_spec(method="uff", basis=None, program="rdkit", spec_name="rdkit", spec_description="rdkit basic spec") # make sure the dataset has no molecules and submit it assert compute_dataset.dataset == {} compute_dataset.submit(client=client) # make sure that the compute has finished fractal_compute_server.await_results() fractal_compute_server.await_services(max_iter=50) # make sure of the results are complete ds = client.get_collection(dataset.dataset_type, dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # update all specs into one dataset dataset.add_qc_spec(**compute_dataset.qc_specifications["rdkit"].dict()) # get the last ran spec if dataset.dataset_type == "DataSet": for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None else: # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == dataset.maxiter assert keywords.values["scf_properties"] == dataset.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default # this will take some time so make sure it is running with no error assert record.status.value == "COMPLETE", print(record.dict()) assert record.error is None
def test_adding_specifications(fractal_compute_server): """ Test adding specifications to datasets. Here we are testing multiple scenarios: 1) Adding an identical specification to a dataset 2) Adding a spec with the same name as another but with different options 3) overwrite a spec which was added but never used. """ client = FractalClient(fractal_compute_server) mol = Molecule.from_smiles("CO") # make a dataset factory = OptimizationDatasetFactory() opt_dataset = factory.create_dataset( dataset_name="Specification error check", molecules=mol, description="test adding new compute specs to datasets", tagline="test adding new compute specs") opt_dataset.clear_qcspecs() # add a new mm spec opt_dataset.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_description="default openff spec", spec_name="openff-1.0.0") opt_dataset.metadata.long_description_url = "https://test.org" # submit the optimizations and let the compute run opt_dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() fractal_compute_server.await_services() # grab the collection ds = client.get_collection(opt_dataset.dataset_type, opt_dataset.dataset_name) # now try and add the specification again this should return True assert opt_dataset.add_dataset_specification( spec=opt_dataset.qc_specifications["openff-1.0.0"], opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(), collection=ds) is True # now change part of the spec but keep the name the same opt_dataset.clear_qcspecs() opt_dataset.add_qc_spec(method="openff-1.2.1", basis="smirnoff", spec_name="openff-1.0.0", program="openmm", spec_description="openff-1.2.1 with wrong name.") # now try and add this specification with the same name but different settings with pytest.raises(QCSpecificationError): opt_dataset.add_dataset_specification( spec=opt_dataset.qc_specifications["openff-1.0.0"], opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(), collection=ds) # now add a new specification but no compute and make sure it is overwritten opt_dataset.clear_qcspecs() opt_dataset.add_qc_spec(method="ani1x", basis=None, program="torchani", spec_name="ani", spec_description="a ani spec") assert opt_dataset.add_dataset_specification( spec=opt_dataset.qc_specifications["ani"], opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(), collection=ds) is True # now change the spec slightly and add again opt_dataset.clear_qcspecs() opt_dataset.add_qc_spec(method="ani1ccx", basis=None, program="torchani", spec_name="ani", spec_description="a ani spec") assert opt_dataset.add_dataset_specification( spec=opt_dataset.qc_specifications["ani"], opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(), collection=ds) is True
def test_basic_submissions_single_pcm_spec(fractal_compute_server): """Test submitting a basic dataset to a snowflake server with pcm water in the specification.""" client = FractalClient(fractal_compute_server) program = "psi4" if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.add_qc_spec(method="hf", basis="sto-3g", program=program, spec_name="default", spec_description="testing the single points with pcm", implicit_solvent=PCMSettings(units="au", medium_Solvent="water"), overwrite=True) # only use one molecule due to the time it takes to run with pcm dataset = factory.create_dataset( dataset_name=f"Test single points with pcm water", molecules=molecules[0], description="Test basics dataset with pcm water", tagline="Testing single point datasets with pcm water", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis break else: raise RuntimeError( f"The requested compute was not found in the history {ds.data.history}" ) for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None # make sure the PCM result was captured assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
def test_basic_submissions_multiple_spec(fractal_compute_server): """Test submitting a basic dataset to a snowflake server with multiple qcspecs.""" client = FractalClient(fractal_compute_server) qc_specs = [{ "method": "openff-1.0.0", "basis": "smirnoff", "program": "openmm", "spec_name": "openff" }, { "method": "gaff-2.11", "basis": "antechamber", "program": "openmm", "spec_name": "gaff" }] molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.clear_qcspecs() for spec in qc_specs: factory.add_qc_spec(**spec, spec_description="testing the single points") dataset = factory.create_dataset( dataset_name=f"Test single points multiple specs", molecules=molecules, description="Test basics dataset", tagline="Testing single point datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None
def test_optimization_submissions_with_pcm(fractal_compute_server): """Test submitting an Optimization dataset to a snowflake server with PCM.""" client = FractalClient(fractal_compute_server) program = "psi4" if not has_program(program): pytest.skip(f"Program '{program}' not found.") # use a single small molecule due to the extra time PCM takes molecules = Molecule.from_smiles("C") factory = OptimizationDatasetFactory(driver="gradient") factory.add_qc_spec(method="hf", basis="sto-3g", program=program, spec_name="default", spec_description="test", implicit_solvent=PCMSettings(units="au", medium_Solvent="water"), overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test optimizations info with pcm water", molecules=molecules, description="Test optimization dataset", tagline="Testing optimization datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == dataset.maxiter assert keywords.values["scf_properties"] == dataset.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 result = record.get_trajectory()[0] assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys() assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys() # make sure the PCM result was captured assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
def execute_optimization_from_server(self, fractal_uri, dataset_name, output_directory=None, ncores=1, memory=2, client=None, compute_specs=None, molids=None, scf_maxiter=200, geometric_maxiter=300, geometric_coordsys='dlc', geometric_qccnv=False): """Execute optimization from the given molecule locally on this host. Will not send results back to the server; this is purely for debugging. TODO: make this send results back to server using same API as manager does. then merge with `execute_...` above. """ from datetime import datetime import json if client is None: client = FractalClient(fractal_uri, verify=False) optds = client.get_collection("OptimizationDataset", dataset_name) optds.status() df = optds.df if (molids is not None) and (len(molids) != 0): df = df.loc[list(molids)] if compute_specs is not None: df = df[compute_specs] local_options = {"ncores": ncores, "memory": memory} results = [] for spec_name in df: if output_directory is not None: os.makedirs(os.path.join(output_directory, spec_name, 'error_mols'), exist_ok=True) print("Processing spec: '{}'".format(spec_name)) for id, opt in df[spec_name].iteritems(): # fix to ensure output fidelity of ids; losing 02 padding on conformer org, molecule, conformer = id.split('-') output_id = "{org}-{molecule:05}-{conformer:02}".format( org=org, molecule=int(molecule), conformer=int(conformer)) # subfolders for each compute spec, files named according to molecule ids if output_directory is not None: outfile = "{}".format( os.path.join(output_directory, spec_name, output_id)) print("... '{}'".format(id)) #task = client.query_tasks(base_result=opt.id)[0] inputs = self._args_from_optimizationrecord(opt, client) # execute optimization start_dt = datetime.utcnow() result = self._execute_qcengine( inputs, local_options=local_options, scf_maxiter=scf_maxiter, geometric_maxiter=geometric_maxiter, geometric_coordsys=geometric_coordsys, geometric_qccnv=geometric_qccnv) end_dt = datetime.utcnow() perfd = { 'start': start_dt.isoformat(), 'end': end_dt.isoformat() } if output_directory is not None: if result.success: try: final_molecule = self._process_optimization_result( output_id, result) self._execute_output_results( output_id=output_id, resultjson=result.json(), final_molecule=final_molecule, outfile=outfile, success=True, perfd=perfd) except Exception as e: print("... '{}' : export error".format(id)) final_molecule = None error_outfile = "{}".format( os.path.join(output_directory, spec_name, 'error_mols', output_id)) try: with open("{}.txt".format(error_outfile), 'w') as f: f.write(str(e)) except: pass self._execute_output_results( output_id=output_id, resultjson=result.json(), final_molecule=final_molecule, outfile=error_outfile, success=False, perfd=perfd) else: print("... '{}' : compute failed".format(id)) final_molecule = None error_outfile = "{}".format( os.path.join(output_directory, spec_name, 'error_mols', output_id)) self._execute_output_results( output_id=output_id, resultjson=result, final_molecule=final_molecule, outfile=error_outfile, success=False, perfd=perfd) results.append(result) return results
def test_torsiondrive_submissions(fractal_compute_server, specification): """ Test submitting a torsiondrive dataset and computing it. """ client = FractalClient(fractal_compute_server) qc_spec, driver = specification program = qc_spec["program"] if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_smiles("CO") factory = TorsiondriveDatasetFactory(driver=driver) factory.add_qc_spec(**qc_spec, spec_name="default", spec_description="test", overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test torsiondrives info {program}, {driver}", molecules=molecules, description="Test torsiondrive dataset", tagline="Testing torsiondrive datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_services(max_iter=50) # make sure of the results are complete ds = client.get_collection("TorsionDriveDataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == dataset.maxiter assert keywords.values["scf_properties"] == dataset.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default # this will take some time so make sure it is running with no error assert record.status.value == "COMPLETE", print(record.dict()) assert record.error is None assert len(record.final_energy_dict) == 24
def export_molecule_data(self, fractal_uri, output_directory, dataset_name, delete_existing=False, keep_existing=True): """Export all molecule data from target QCFractal server to the given directory. Parameters ---------- fractal_uri : str Target QCFractal server URI. output_directory : str Directory path to deposit exported data. dataset_name : str Dataset name to extract from the QCFractal server. delete_existing : bool (False) If True, delete existing directory if present. keep_existing : bool (True) If True, keep existing files in export directory. Files corresponding to server data will not be re-exported. Relies *only* on filepaths of existing files for determining match. """ import json # get dataset client = FractalClient(fractal_uri, verify=False) optds = client.get_collection("OptimizationDataset", dataset_name) optds.status() try: os.makedirs(output_directory) except OSError: if delete_existing: shutil.rmtree(output_directory) elif keep_existing: pass else: raise Exception( f'Output directory {output_directory} already exists. ' 'Specify `delete_existing=True` to remove, or `keep_existing=True` to tolerate' ) # for each compute spec, create a folder in the output directory # deposit SDF giving final molecule, energy specs = optds.list_specifications().index.tolist() for spec in specs: print("Exporting spec: '{}'".format(spec)) os.makedirs(os.path.join(output_directory, spec, 'error_mols'), exist_ok=True) optentspec = optds.get_specification(spec) records = optds.data.dict()['records'] for id, opt in optds.df[spec].iteritems(): # skip incomplete cases if opt.final_molecule is None: print("... '{}' : skipping INCOMPLETE".format(id)) continue # fix to ensure output fidelity of ids; losing 02 padding on conformer org, molecule, conformer = id.split('-') output_id = "{org}-{molecule:05}-{conformer:02}".format( org=org, molecule=int(molecule), conformer=int(conformer)) # subfolders for each compute spec, files named according to molecule ids outfile = "{}".format( os.path.join(output_directory, spec, output_id)) # if we did not delete everything at the start and the path already exists, # skip this one; reduces processing and writes to filesystem if (not delete_existing) and os.path.exists( "{}.sdf".format(outfile)): print("... '{}' : skipping SDF exists".format(id)) continue print("... '{}' : exporting COMPLETE".format(id)) optd = self._get_complete_optimization_result(opt, client) optdjson = json.dumps(optd) perfd = { 'walltime': opt.provenance.wall_time, 'completed': opt.modified_on.isoformat() } try: offmol = self._mol_from_qcserver(records[id.lower()]) # set conformer as final, optimized geometry final_qcmol = opt.get_final_molecule() final_molecule = self._process_final_mol( output_id, offmol, final_qcmol, optentspec.qc_spec.method, optentspec.qc_spec.basis, optentspec.qc_spec.program, opt.energies) self._execute_output_results(output_id=output_id, resultjson=optdjson, final_molecule=final_molecule, outfile=outfile, success=True, perfd=perfd) except Exception as e: print("... '{}' : export error".format(id)) final_molecule = None error_outfile = "{}".format( os.path.join(output_directory, spec, 'error_mols', output_id)) try: with open("{}.txt".format(error_outfile), 'w') as f: f.write(str(e)) except: pass self._execute_output_results(output_id=output_id, resultjson=optdjson, final_molecule=final_molecule, outfile=error_outfile, success=False, perfd=perfd)