def test_pcm_default_string(): """ Make sure the default string is correctly formatted. """ pcm = PCMSettings(units="au", medium_Solvent="Water") assert pcm.to_string( ) == '\n Units = au\n CODATA = 2010\n Medium {\n SolverType = IEFPCM\n Nonequilibrium = False\n Solvent = H2O\n MatrixSymm = True\n Correction = 0.0\n DiagonalScaling = 1.07\n ProbeRadius = 1.0}\n Cavity {\n Type = GePol\n Area = 0.3\n Scaling = True\n RadiiSet = Bondi\n MinRadius = 100\n Mode = Implicit}'
def test_pcm_codata(data): """ Make sure an accptable codata value is passed and an error is raised if not. """ codata, error = data if error is not None: with pytest.raises(error): _ = PCMSettings(units="AU", medium_Solvent="water", codata=codata) else: pcm = PCMSettings(units="AU", medium_Solvent="water", codata=codata) assert pcm.codata == codata
def test_pcm_units(data): """ Make sure proper units are validated. """ unit, error = data if error is not None: with pytest.raises(error): _ = PCMSettings(units=unit, medium_Solvent="Water") else: pcm = PCMSettings(units=unit, medium_Solvent="Water") assert pcm.medium_Solvent == "H2O"
def test_pcm_solvent(solvent_data): """ Make sure solvents can be accepted as either names or chemical formula but are always converted to formula. """ solvent, formula, error = solvent_data if error is not None: with pytest.raises(error): _ = PCMSettings(units="au", medium_Solvent=solvent) else: pcm = PCMSettings(units="au", medium_Solvent=solvent) assert pcm.medium_Solvent == formula
def test_pcm_cavity(): """ Make sure only the GePol cavity can be set. """ # try and change from GePol with pytest.raises(PCMSettingError): _ = PCMSettings(units="au", medium_Solvent="Water", cavity_Type="isosurface") # make sure gepol is the default pcm = PCMSettings(units="au", medium_Solvent="Water", cavity_Type="gepol") assert pcm.cavity_Type == "GePol"
def test_pcm_unit_conversion_defaults(): """ Make sure the the default settings are converted to the correct units. """ # make sure the au are kept as default pcm = PCMSettings(units="au", medium_Solvent="water") assert pcm.medium_ProbeRadius == 1.0 assert pcm.cavity_Area == 0.3 assert pcm.cavity_MinRadius == 100 pcm2 = PCMSettings(units="angstrom", medium_Solvent="water") assert pcm2.medium_ProbeRadius == pcm.medium_ProbeRadius * constants.bohr2angstroms assert pcm2.cavity_Area == pcm.cavity_Area * constants.bohr2angstroms**2 assert pcm2.cavity_MinRadius == pcm.cavity_MinRadius * constants.bohr2angstroms
def test_pcm_cavity_mode(): """ Make sure only the implicit mode is allowed for collection computing. """ # try and change to explicit with pytest.raises(PCMSettingError): _ = PCMSettings(units="au", medium_Solvent="water", cavity_Mode="Explicit") # make sure the default is implicit pcm = PCMSettings(units="au", medium_Solvent="water", cavity_Mode="implicit") assert pcm.cavity_Mode == "Implicit"
def test_pcm_solver(data): """ Make sure only IEFPCM and CPCM solvers are allowed. """ solver, error = data if error is not None: with pytest.raises(error): _ = PCMSettings(units="au", medium_Solvent="water", medium_SolverType=solver) else: pcm = PCMSettings(units="au", medium_Solvent="water", medium_SolverType=solver) assert pcm.medium_SolverType == solver
def test_pcm_radiisets(data): """ Make sure only valid radii are allowed """ radii, error = data if error is not None: with pytest.raises(error): _ = PCMSettings(units="au", medium_Solvent="Water", cavity_RadiiSet=radii) else: pcm = PCMSettings(units="au", medium_Solvent="Water", cavity_RadiiSet=radii) assert pcm.cavity_RadiiSet == radii
def test_qcspec_with_solvent(): """ Make sure we only allow PCM to be used with PSI4. """ # make sure an error is raised with any program that is not psi4 with pytest.raises(QCSpecificationError): _ = QCSpec(method="ani2x", basis=None, program="torchani", spec_name="ani2x", spec_description="testing ani with solvent", implicit_solvent=PCMSettings(units="au", medium_Solvent="water")) # now try with PSI4 qc_spec = QCSpec( implicit_solvent=PCMSettings(units="au", medium_Solvent="water")) assert qc_spec.implicit_solvent is not None assert qc_spec.implicit_solvent.medium_Solvent == "H2O"
def test_pcm_unit_conversion(): """ Make sure only defaults are converted and given options are kept constant. """ # set the probe radius to 2 angstroms pcm = PCMSettings(units="angstrom", medium_Solvent="water", medium_ProbeRadius=2) assert pcm.medium_ProbeRadius == 2 # make sure this has been converted assert pcm.cavity_Area != 0.3
def test_optimization_submissions_with_pcm(fractal_compute_server): """Test submitting an Optimization dataset to a snowflake server with PCM.""" client = FractalClient(fractal_compute_server) program = "psi4" if not has_program(program): pytest.skip(f"Program '{program}' not found.") # use a single small molecule due to the extra time PCM takes molecules = Molecule.from_smiles("C") factory = OptimizationDatasetFactory(driver="gradient") factory.add_qc_spec(method="hf", basis="sto-3g", program=program, spec_name="default", spec_description="test", implicit_solvent=PCMSettings(units="au", medium_Solvent="water"), overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test optimizations info with pcm water", molecules=molecules, description="Test optimization dataset", tagline="Testing optimization datasets", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("OptimizationDataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec for qc_spec in dataset.qc_specifications.values(): spec = ds.data.specs[qc_spec.spec_name] assert spec.description == qc_spec.spec_description assert spec.qc_spec.driver == dataset.driver assert spec.qc_spec.method == qc_spec.method assert spec.qc_spec.basis == qc_spec.basis assert spec.qc_spec.program == qc_spec.program # check the keywords keywords = client.query_keywords(spec.qc_spec.keywords)[0] assert keywords.values["maxiter"] == dataset.maxiter assert keywords.values["scf_properties"] == dataset.scf_properties # query the dataset ds.query(qc_spec.spec_name) for index in ds.df.index: record = ds.df.loc[index].default assert record.status.value == "COMPLETE" assert record.error is None assert len(record.trajectory) > 1 result = record.get_trajectory()[0] assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys() assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys() # make sure the PCM result was captured assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
def test_basic_submissions_single_pcm_spec(fractal_compute_server): """Test submitting a basic dataset to a snowflake server with pcm water in the specification.""" client = FractalClient(fractal_compute_server) program = "psi4" if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.add_qc_spec(method="hf", basis="sto-3g", program=program, spec_name="default", spec_description="testing the single points with pcm", implicit_solvent=PCMSettings(units="au", medium_Solvent="water"), overwrite=True) # only use one molecule due to the time it takes to run with pcm dataset = factory.create_dataset( dataset_name=f"Test single points with pcm water", molecules=molecules[0], description="Test basics dataset with pcm water", tagline="Testing single point datasets with pcm water", ) with pytest.raises(DatasetInputError): dataset.submit(client=client, await_result=False) # now add a mock url so we can submit the data dataset.metadata.long_description_url = "https://test.org" # now submit again dataset.submit(client=client, await_result=False) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis break else: raise RuntimeError( f"The requested compute was not found in the history {ds.data.history}" ) for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None # make sure the PCM result was captured assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
def main(): # Load in the set of molecules to add to the data set. esp_store = MoleculeESPStore() molecules = [ molecule_from_record(esp_record) for smiles in esp_store.list() for esp_record in esp_store.retrieve(smiles) ] # Store the conformers as SDF files. os.makedirs("conformers", exist_ok=True) for i, molecule in enumerate(molecules): molecule.to_file(os.path.join("conformers", f"{i}.sdf"), "SDF") # Generate the data set to submit. factory = BasicDatasetFactory( qc_specifications={ "resp-2-vacuum": QCSpec( method="pw6b95", basis="aug-cc-pV(D+d)Z", spec_name="resp-2-vacuum", spec_description= ("The quantum chemistry specification used in the RESP2 publication " "for the vacuum (i.e. no PCM) calculations."), store_wavefunction=WavefunctionProtocolEnum. orbitals_and_eigenvalues), "resp-2-water": QCSpec( method="pw6b95", basis="aug-cc-pV(D+d)Z", spec_name="resp-2-water", spec_description= ("The quantum chemistry specification used in the RESP2 publication " "for the aqueous (i.e. with PCM) calculations."), store_wavefunction=WavefunctionProtocolEnum. orbitals_and_eigenvalues, implicit_solvent=PCMSettings( units="angstrom", cavity_Type="GePol", cavity_Area=0.3, cavity_Scaling=True, cavity_RadiiSet="Bondi", cavity_Mode="Implicit", medium_SolverType="CPCM", medium_Solvent="Water", )), }) data_set = factory.create_dataset( dataset_name="OpenFF BCC Refit Study COH v1.0", molecules=molecules, description= "A data set curated for the initial stage of the on-going OpenFF " "study which aims to co-optimize the AM1BCC bond charge correction (BCC) " "parameters against an experimental training set of density and enthalpy of " "mixing data points and a QM training set of electric field data." "\n\n" "The initial data set is limited to only molecules composed of C, O, H. This " "limited scope significantly reduces the number of BCC parameters which must " "be retrained, thus allowing for easier convergence of the initial " "optimizations." "\n\n" "The included molecules are those included in the experimental data set as " "well as an additional set chosen to ensure that each BCC parameter to train " "has been sufficiently (at least five instances) represented and exercised." "\n\n" "The conformers included in the set where generated using version 0.0.1a4 of " "the openff-recharge package. The exact conformer generation settings are " "attached as provenance.", tagline="C,H,O single point training data for BCC refits.", ) # Validate that the data set matches expectations. assert data_set.n_molecules == 94 assert data_set.n_records == 215 # Attach the conformer generation provenance data_set.provenance["openff-recharge"] = "0.0.1a4" data_set.provenance["conformer-generation"] = ( '{"method": "omega-elf10", "sampling_mode": "dense", "max_conformers": 5}' ) data_set.provenance["openeye"] = openeye.__version__ # Correct the dataset metadata. data_set.metadata.submitter = "simonboothroyd" data_set.metadata.long_description_url = ( "https://github.com/openforcefield/qca-dataset-submission/tree/master/" "submissions/" "2020-10-30-OpenFF-BCC-Refit-Study-COH") # Export the data set. data_set.export_dataset("dataset.json.xz") data_set.molecules_to_file('molecules.smi', 'smi') data_set.visualize("dataset.pdf", columns=8)