def test_factory_cmiles(): """ Test the basic factories ability to make cmiles attributes for the molecules. """ factory = BasicDatasetFactory() mol = Molecule.from_smiles("CC") cmiles_factory = factory.create_cmiles_metadata(mol) # now make our own cmiles test_cmiles = { "canonical_smiles": mol.to_smiles(isomeric=False, explicit_hydrogens=False, mapped=False), "canonical_isomeric_smiles": mol.to_smiles(isomeric=True, explicit_hydrogens=False, mapped=False), "canonical_explicit_hydrogen_smiles": mol.to_smiles(isomeric=False, explicit_hydrogens=True, mapped=False), "canonical_isomeric_explicit_hydrogen_smiles": mol.to_smiles(isomeric=True, explicit_hydrogens=True, mapped=False), "canonical_isomeric_explicit_hydrogen_mapped_smiles": mol.to_smiles(isomeric=True, explicit_hydrogens=True, mapped=True), "molecular_formula": mol.hill_formula, "standard_inchi": mol.to_inchi(fixed_hydrogens=False), "inchi_key": mol.to_inchikey(fixed_hydrogens=False), "fixed_hydrogen_inchi": mol.to_inchi(fixed_hydrogens=True), "fixed_hydrogen_inchi_key": mol.to_inchikey(fixed_hydrogens=True), "unique_fixed_hydrogen_inchi_keys": {'OTMSDBZUPAUEDD-UHFFFAOYNA-N'}, } assert test_cmiles == cmiles_factory
def test_scf_properties_assignment(): """Test adding different scf_properties and make sure they are validated correctly.""" factory = BasicDatasetFactory() # incorrect spellings with pytest.raises(DatasetInputError): factory.scf_properties = ["diapole", "qudrupole"]
def test_adding_removing_scf_properties(): """ Test adding different scf_properties which should be passed through validation. """ factory = BasicDatasetFactory() # test strange caps factory.scf_properties = ["QuaDruPole"] factory.remove_scf_property(scf_property="QuaDruPole") # test adding new property factory.add_scf_property(scf_property="mulliken_charges") # add the wrong property with pytest.raises(DatasetInputError): factory.add_scf_property(scf_property="FakeProperty") assert factory.scf_properties == ["mulliken_charges"]
def test_factory_round_trip(file_type, tmpdir): """ Test round tripping a factory to file with a workflow. """ with tmpdir.as_cwd(): factory = BasicDatasetFactory(driver="energy", maxiter=1) efilter = workflow_components.ElementFilter() weight = workflow_components.MolecularWeightFilter() conformer = workflow_components.StandardConformerGenerator() factory.add_workflow_components(efilter, weight, conformer) file_name = "test." + file_type factory.export(file_name) factory2 = BasicDatasetFactory.from_file(file_name) assert factory2.driver == factory.driver assert factory2.workflow == factory.workflow
def test_basic_submissions_single_spec(fractal_compute_server, specification): """Test submitting a basic dataset to a snowflake server.""" client = FractalClient(fractal_compute_server) qc_spec, driver = specification program = qc_spec["program"] if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver=driver) factory.add_qc_spec(**qc_spec, spec_name="default", spec_description="testing the single points", overwrite=True) dataset = factory.create_dataset( dataset_name=f"Test single points info {program}, {driver}", molecules=molecules, description="Test basics dataset", tagline="Testing single point datasets", ) # force a metadata validation error dataset.metadata.long_description = None with pytest.raises(DatasetInputError): dataset.submit(client=client) # re-add the description so we can submit the data dataset.metadata.long_description = "Test basics dataset" # now submit again dataset.submit(client=client) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis break else: raise RuntimeError( f"The requested compute was not found in the history {ds.data.history}" ) for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) # make sure all of the conformers were submitted assert len(query.index) == len(molecules) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None
def test_basic_submissions_wavefunction(fractal_compute_server): """ Test submitting a basic dataset with a wavefunction protocol and make sure it is executed. """ # only a psi4 test if not has_program("psi4"): pytest.skip("Program psi4 not found.") client = FractalClient(fractal_compute_server) molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.clear_qcspecs() factory.add_qc_spec(method="hf", basis="sto-6g", program="psi4", spec_name="default", spec_description="wavefunction spec", store_wavefunction="orbitals_and_eigenvalues") dataset = factory.create_dataset( dataset_name="Test single points with wavefunction", molecules=molecules, description="Test basics dataset", tagline="Testing single point datasets with wavefunction", ) # submit the dataset # now submit again dataset.submit(client=client) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None basis = result.get_wavefunction("basis") assert basis.name.lower() == "sto-6g" orbitals = result.get_wavefunction("orbitals_a") assert orbitals.shape is not None
def test_basic_submissions_single_pcm_spec(fractal_compute_server): """Test submitting a basic dataset to a snowflake server with pcm water in the specification.""" client = FractalClient(fractal_compute_server) program = "psi4" if not has_program(program): pytest.skip(f"Program '{program}' not found.") molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.add_qc_spec(method="hf", basis="sto-3g", program=program, spec_name="default", spec_description="testing the single points with pcm", implicit_solvent=PCMSettings(units="au", medium_Solvent="water"), overwrite=True) # only use one molecule due to the time it takes to run with pcm dataset = factory.create_dataset( dataset_name="Test single points with pcm water", molecules=molecules[0], description="Test basics dataset with pcm water", tagline="Testing single point datasets with pcm water", ) # force a metadata validation error dataset.metadata.long_description = None with pytest.raises(DatasetInputError): dataset.submit(client=client) # re-add the description so we can submit the data dataset.metadata.long_description = "Test basics dataset" # now submit again dataset.submit(client=client) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis break else: raise RuntimeError( f"The requested compute was not found in the history {ds.data.history}" ) for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None # make sure the PCM result was captured assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
def test_basic_submissions_multiple_spec(fractal_compute_server): """Test submitting a basic dataset to a snowflake server with multiple qcspecs.""" client = FractalClient(fractal_compute_server) qc_specs = [{ "method": "openff-1.0.0", "basis": "smirnoff", "program": "openmm", "spec_name": "openff" }, { "method": "gaff-2.11", "basis": "antechamber", "program": "openmm", "spec_name": "gaff" }] molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb") factory = BasicDatasetFactory(driver="energy") factory.clear_qcspecs() for spec in qc_specs: factory.add_qc_spec(**spec, spec_description="testing the single points") dataset = factory.create_dataset( dataset_name="Test single points multiple specs", molecules=molecules, description="Test basics dataset", tagline="Testing single point datasets", ) # force a metadata validation error dataset.metadata.long_description = None with pytest.raises(DatasetInputError): dataset.submit(client=client) # re-add the description so we can submit the data dataset.metadata.long_description = "Test basics dataset" # now submit again dataset.submit(client=client) fractal_compute_server.await_results() # make sure of the results are complete ds = client.get_collection("Dataset", dataset.dataset_name) # check the metadata meta = Metadata(**ds.data.metadata) assert meta == dataset.metadata assert ds.data.description == dataset.description assert ds.data.tagline == dataset.dataset_tagline assert ds.data.tags == dataset.dataset_tags # check the provenance assert dataset.provenance == ds.data.provenance # check the qc spec assert ds.data.default_driver == dataset.driver # get the last ran spec for specification in ds.data.history: driver, program, method, basis, spec_name = specification spec = dataset.qc_specifications[spec_name] assert driver == dataset.driver assert program == spec.program assert method == spec.method assert basis == spec.basis for spec in dataset.qc_specifications.values(): query = ds.get_records( method=spec.method, basis=spec.basis, program=spec.program, ) # make sure all conformers are submitted assert len(query.index) == len(molecules) for index in query.index: result = query.loc[index].record assert result.status.value.upper() == "COMPLETE" assert result.error is None assert result.return_result is not None
def get_molecule_cmiles(molecule: off.Molecule) -> MoleculeAttributes: """ Generate the molecule cmiles data. """ factory = BasicDatasetFactory() return factory.create_cmiles_metadata(molecule)