def test_create_dataset_atom_map(): """Test creating a dataset with molecules with atom maps.""" factory = OptimizationDatasetFactory() mol = Molecule.from_smiles("CCCC([O-])=O") mol.properties['atom_map'] = {1: 1, 2: 2, 3: 3, 4: 4} _ = factory.create_dataset(dataset_name="test name", molecules=mol, description="Force field test", tagline="A test dataset")
def test_optimization_submissions_with_constraints(fractal_compute_server): """ Make sure that the constraints are added to the optimization and enforced. """ client = FractalClient(fractal_compute_server) ethane = Molecule.from_file(get_data("ethane.sdf"), "sdf") factory = OptimizationDatasetFactory() dataset = OptimizationDataset( dataset_name="Test optimizations with constraint", description="Test optimization dataset with constraints", tagline="Testing optimization datasets") # add just mm spec dataset.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="default", spec_description="mm default spec", overwrite=True) # build some constraints constraints = Constraints() constraints.add_set_constraint(constraint_type="dihedral", indices=[2, 0, 1, 5], value=60, bonded=True) constraints.add_freeze_constraint(constraint_type="distance", indices=[0, 1], bonded=True) # add the molecule attributes = factory.create_cmiles_metadata(ethane) index = ethane.to_smiles() dataset.add_molecule(index=index, molecule=ethane, attributes=attributes, constraints=constraints) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) record = ds.get_record(ds.df.index[0], "default") assert "constraints" in record.keywords assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 # now make sure the constraints worked final_molecule = record.get_final_molecule() assert pytest.approx(60, final_molecule.measure((2, 0, 1, 5))) assert pytest.approx(record.get_initial_molecule().measure((0, 1)), final_molecule.measure((0, 1)))
def test_optimization_driver(): """ Test the optimization factory to make sure the driver can not be changed. """ factory = OptimizationDatasetFactory() with pytest.raises(DriverError): factory.driver = "energy" assert factory.driver == "gradient"
def _create_qcsubmit_dataset(self, dataset_name, mols, season): from openff.qcsubmit.factories import OptimizationDataset, OptimizationDatasetFactory # create OptimizationDataset with QCSubmit ds = OptimizationDataset(dataset_name=dataset_name) factory = OptimizationDatasetFactory() for mol in mols: id = self._mol_to_id(mol) attributes = factory.create_cmiles_metadata(mol) ds.add_molecule(index=id, molecule=mol, attributes=attributes) ds.qc_specifications = SEASONS[season] ds.metadata.long_description_url = "https://localhost.local/null" # add in known modifications to `OptimizationDataset` defaults ds.optimization_procedure.coordsys = 'dlc' ds.optimization_procedure.reset = True return ds
def test_optimization_submissions_with_pcm(fractal_compute_server): """Test submitting an Optimization dataset to a snowflake server with PCM.""" client = FractalClient(fractal_compute_server) program = "psi4" if not has_program(program): pytest.skip(f"Program '{program}' not found.") # use a single small molecule due to the extra time PCM takes molecules = Molecule.from_smiles("C") factory = OptimizationDatasetFactory(driver="gradient") factory.add_qc_spec(method="hf", basis="sto-3g", program=program, spec_name="default", spec_description="test", implicit_solvent=PCMSettings(units="au", medium_Solvent="water"), overwrite=True) dataset = factory.create_dataset( dataset_name="Test optimizations info with pcm water", molecules=molecules, description="Test optimization dataset", tagline="Testing optimization datasets", ) # force a metadata validation error dataset.metadata.long_description = None with pytest.raises(DatasetInputError): dataset.submit(client=client) # re-add the description so we can submit the data dataset.metadata.long_description = "Test basics dataset" # now submit again dataset.submit(client=client) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == qc_spec.maxiter assert keywords.values["scf_properties"] == qc_spec.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 result = record.get_trajectory()[0] assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys() assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys() # make sure the PCM result was captured assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
def test_optimization_submissions(fractal_compute_server, specification): """Test submitting an Optimization dataset to a snowflake server.""" client = FractalClient(fractal_compute_server) qc_spec, driver = specification program = qc_spec["program"] if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = OptimizationDatasetFactory(driver=driver) factory.add_qc_spec(**qc_spec, spec_name="default", spec_description="test", overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test optimizations info {program}, {driver}", molecules=molecules[:2], description="Test optimization dataset", tagline="Testing optimization datasets", ) # force a metadata validation error dataset.metadata.long_description = None with pytest.raises(DatasetInputError): dataset.submit(client=client) # re-add the description so we can submit the data dataset.metadata.long_description = "Test basics dataset" # now submit again dataset.submit(client=client) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == qc_spec.maxiter assert keywords.values["scf_properties"] == qc_spec.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 # if we used psi4 make sure the properties were captured if program == "psi4": result = record.get_trajectory()[0] assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys() assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys()
def test_adding_specifications(fractal_compute_server): """ Test adding specifications to datasets. Here we are testing multiple scenarios: 1) Adding an identical specification to a dataset 2) Adding a spec with the same name as another but with different options 3) overwrite a spec which was added but never used. """ client = FractalClient(fractal_compute_server) mol = Molecule.from_smiles("CO") # make a dataset factory = OptimizationDatasetFactory() opt_dataset = factory.create_dataset( dataset_name="Specification error check", molecules=mol, description="test adding new compute specs to datasets", tagline="test adding new compute specs") opt_dataset.clear_qcspecs() # add a new mm spec opt_dataset.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_description="default openff spec", spec_name="openff-1.0.0") # submit the optimizations and let the compute run opt_dataset.submit(client=client) fractal_compute_server.await_results() fractal_compute_server.await_services() # grab the collection ds = client.get_collection(opt_dataset.type, opt_dataset.dataset_name) # now try and add the specification again this should return True assert opt_dataset._add_dataset_specification( spec=opt_dataset.qc_specifications["openff-1.0.0"], procedure_spec=opt_dataset.optimization_procedure.get_optimzation_spec( ), dataset=ds) is True # now change part of the spec but keep the name the same opt_dataset.clear_qcspecs() opt_dataset.add_qc_spec(method="openff-1.2.1", basis="smirnoff", spec_name="openff-1.0.0", program="openmm", spec_description="openff-1.2.1 with wrong name.") # now try and add this specification with the same name but different settings with pytest.raises(QCSpecificationError): opt_dataset._add_dataset_specification( spec=opt_dataset.qc_specifications["openff-1.0.0"], procedure_spec=opt_dataset.optimization_procedure. get_optimzation_spec(), dataset=ds) # now add a new specification but no compute and make sure it is overwritten opt_dataset.clear_qcspecs() opt_dataset.add_qc_spec(method="ani1x", basis=None, program="torchani", spec_name="ani", spec_description="a ani spec") assert opt_dataset._add_dataset_specification( spec=opt_dataset.qc_specifications["ani"], procedure_spec=opt_dataset.optimization_procedure.get_optimzation_spec( ), dataset=ds) is True # now change the spec slightly and add again opt_dataset.clear_qcspecs() opt_dataset.add_qc_spec(method="ani1ccx", basis=None, program="torchani", spec_name="ani", spec_description="a ani spec") assert opt_dataset._add_dataset_specification( spec=opt_dataset.qc_specifications["ani"], procedure_spec=opt_dataset.optimization_procedure.get_optimzation_spec( ), dataset=ds) is True
def execute_optimization_from_molecules(self, input_paths, output_directory, season, ncores=1, memory=2, delete_existing=False, keep_existing=True, recursive=False, scf_maxiter=200, geometric_maxiter=300, geometric_coordsys='dlc', geometric_qccnv=False): """Execute optimizations from the given SDF molecules locally on this host. Optimizations are performed in series for the molecules given, with `ncores` and `memory` setting the resource constraints each optimization. Parameters ---------- input_paths : iterable of Path-like Paths to SDF files or directories; if directories, all files SDF files in are loaded, recursively. output_directory : str Directory path to deposit exported data. season : str Benchmark season identifier. Indicates the mix of compute specs to utilize. ncores : int Number of concurrent cores to use for each optimization. memory : float Amount of memory in GiB to allow for each optimization. delete_existing : bool (False) If True, delete existing directory if present. keep_existing : bool (True) If True, keep existing files in export directory. Files corresponding to server data will not be re-exported. Relies *only* on filepaths of existing files for determining match. recursive : bool If True, recursively load SDFs from any directories given in `input_paths`. """ from datetime import datetime import json from openff.qcsubmit.factories import OptimizationDatasetFactory # fail early if output_directory already exists and we aren't deleting it if os.path.isdir(output_directory): if delete_existing: shutil.rmtree(output_directory) elif keep_existing: pass else: raise Exception( f'Output directory {output_directory} already exists. ' 'Specify `delete_existing=True` to remove, or `keep_existing=True` to tolerate' ) # get paths to submit, using output directory contents to inform choice # for the given specs, if *any* expected output files are not present, we submit corresponding input file if keep_existing: in_out_path_map = self._source_specs_output_paths( input_paths, SEASONS[season], output_directory, recursive=recursive) input_paths = [] for input_file, output_files in in_out_path_map.items(): if not all(map(os.path.exists, output_files)): input_paths.append(input_file) # extract molecules from SDF inputs mols = mols_from_paths(input_paths, recursive=recursive) factory = OptimizationDatasetFactory() local_options = {"ncores": ncores, "memory": memory} results = [] for spec_name, compute_spec in SEASONS[season].items(): print("Processing spec: '{}'".format(spec_name)) os.makedirs(os.path.join(output_directory, spec_name, 'error_mols'), exist_ok=True) for mol in mols: id = self._mol_to_id(mol) # fix to ensure output fidelity of ids; losing 02 padding on conformer org, molecule, conformer = id.split('-') output_id = "{org}-{molecule:05}-{conformer:02}".format( org=org, molecule=int(molecule), conformer=int(conformer)) # subfolders for each compute spec, files named according to molecule ids outfile = "{}".format( os.path.join(output_directory, spec_name, output_id)) print("... '{}'".format(id)) input_data = self._generate_optimization_input( mol, compute_spec, factory) # execute optimization start_dt = datetime.utcnow() result = self._execute_qcengine( input_data, local_options=local_options, scf_maxiter=scf_maxiter, geometric_maxiter=geometric_maxiter, geometric_coordsys=geometric_coordsys, geometric_qccnv=geometric_qccnv) end_dt = datetime.utcnow() perfd = { 'start': start_dt.isoformat(), 'end': end_dt.isoformat() } if result.success: try: final_molecule = self._process_optimization_result( output_id, result) self._execute_output_results( output_id=output_id, resultjson=result.json(), final_molecule=final_molecule, outfile=outfile, success=True, perfd=perfd) except Exception as e: print("... '{}' : export error".format(id)) final_molecule = None error_outfile = "{}".format( os.path.join(output_directory, spec_name, 'error_mols', output_id)) try: with open("{}.txt".format(error_outfile), 'w') as f: f.write(str(e)) except: pass self._execute_output_results( output_id=output_id, resultjson=result.json(), final_molecule=final_molecule, outfile=error_outfile, success=False, perfd=perfd) else: print("... '{}' : compute failed".format(id)) final_molecule = None error_outfile = "{}".format( os.path.join(output_directory, spec_name, 'error_mols', output_id)) self._execute_output_results(output_id=output_id, resultjson=result, final_molecule=final_molecule, outfile=error_outfile, success=False, perfd=perfd) results.append(result) return results