def test_create_torsiondrive_dataset(): """ Make sure we can correclt make a dataset using the scan enumerator. """ factory = TorsiondriveDatasetFactory() scan_filter = workflow_components.ScanEnumerator() scan_filter.add_torsion_scan(smarts="[*:1]~[*:2]-[#8:3]-[#1:4]", scan_rage=(-90, 90), scan_increment=10) factory.add_workflow_components(scan_filter) conformer_generator = workflow_components.StandardConformerGenerator( max_conformers=1) factory.add_workflow_components(conformer_generator) mols = Molecule.from_file(get_data("tautomers_small.smi"), "smi", allow_undefined_stereo=True) dataset = factory.create_dataset(dataset_name="test name", molecules=mols, description="Force field test", tagline="A test dataset", processors=1) assert dataset.n_molecules > 0 assert dataset.n_records > 0 for entry in dataset.dataset.values(): assert entry.keywords.dihedral_ranges == [(-90, 90)] assert entry.keywords.grid_spacing == [10]
def test_expanding_compute(fractal_compute_server, factory_type): """ Make sure that if we expand the compute of a dataset tasks are generated. """ client = FractalClient(fractal_compute_server) molecule = Molecule.from_smiles("CC") molecule.generate_conformers(n_conformers=1) factory = factory_type() scan_enum = workflow_components.ScanEnumerator() scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#6:3]~[*:4]") factory.add_workflow_components(scan_enum) factory.clear_qcspecs() # add only mm specs factory.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="default", spec_description="standard parsley spec") dataset = factory.create_dataset( dataset_name=f"Test compute expand {factory.type}", molecules=molecule, description="Test compute expansion", tagline="Testing compute expansion", ) # make sure all expected index get submitted dataset.submit(client=client) # grab the dataset and check the history ds = client.get_collection(dataset.type, dataset.dataset_name) assert ds.data.history == {"default"} # now make another dataset to expand the compute factory.clear_qcspecs() # add only mm specs factory.add_qc_spec(method="openff-1.2.0", basis="smirnoff", program="openmm", spec_name="parsley2", spec_description="standard parsley spec") dataset = factory.create_dataset( dataset_name=f"Test compute expand {factory.type}", molecules=[], description="Test compute expansion", tagline="Testing compute expansion", ) # now submit again dataset.submit(client=client) # now grab the dataset again and check the tasks list ds = client.get_collection(dataset.type, dataset.dataset_name) assert ds.data.history == {"default", "parsley2"} # make sure a record has been made entry = ds.get_entry(ds.df.index[0]) assert "parsley2" in entry.object_map
def test_scan_enumerator_unique(): """ If the enumerator would hit multiple torsions in a molecule make sure they are unique by symmetry. """ mol = Molecule.from_smiles("CCCC") scan_tagger = workflow_components.ScanEnumerator() scan_tagger.add_torsion_scan(smarts="[*:1]~[#6:2]-[#6:3]~[*:4]") result = scan_tagger.apply(molecules=[mol], processors=1, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY) assert result.n_molecules == 1 indexer = mol.properties["dihedrals"] assert indexer.n_torsions == 2
def test_scan_enumerator_no_scans(): """ Make sure molecules are filtered if they have no scans assigned. """ mol = Molecule.from_smiles("CC") scan_tagger = workflow_components.ScanEnumerator() scan_tagger.add_torsion_scan(smarts="[*:1]~[#8:1]-[#6:3]~[*:4]", scan_rage=(-40, 40), scan_increment=15) result = scan_tagger.apply([mol], processors=1, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY) assert result.n_molecules == 0 assert result.n_filtered == 1
def test_scan_enumerator_1d(): """ Make sure only one match is tagged per torsion. """ mol = Molecule.from_smiles("CCC") scan_tagger = workflow_components.ScanEnumerator() scan_tagger.add_torsion_scan(smarts="[*:1]~[#6:2]-[#6:3]~[*:4]", scan_rage=(-60, 60), scan_increment=20) result = scan_tagger.apply([mol], processors=1, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY) assert result.n_molecules == 1 indexer = mol.properties["dihedrals"] assert indexer.n_torsions == 1 assert indexer.torsions[(1, 2)].scan_range1 == (-60, 60)
def test_adding_dataset_entry_fail(fractal_compute_server, factory_type, capsys): """ Make sure that the new entries is not incremented if we can not add a molecule to the server due to a name clash. TODO add basic dataset into the testing if the api changes to return an error when adding the same index twice """ client = FractalClient(fractal_compute_server) molecule = Molecule.from_smiles("CO") molecule.generate_conformers(n_conformers=1) factory = factory_type() scan_enum = workflow_components.ScanEnumerator() scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#8:3]~[*:4]") factory.add_workflow_components(scan_enum) factory.clear_qcspecs() # add only mm specs factory.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="parsley", spec_description="standard parsley spec") dataset = factory.create_dataset( dataset_name=f"Test index clash for {factory.type}", molecules=molecule, description="Test ignore errors dataset", tagline="Testing ignore errors datasets", ) # make sure all expected index get submitted dataset.submit(client=client, verbose=True) info = capsys.readouterr() assert info.out == f"Number of new entries: {dataset.n_records}/{dataset.n_records}\n" # now add a new spec and try and submit again dataset.clear_qcspecs() dataset.add_qc_spec(method="mmff94", basis=None, program="rdkit", spec_name="mff94", spec_description="mff94 force field in rdkit") dataset.submit(client=client, verbose=True) info = capsys.readouterr() assert info.out == f"Number of new entries: 0/{dataset.n_records}\n"
def test_torsiondrive_scan_keywords(fractal_compute_server): """ Test running torsiondrives with unique keyword settings which overwrite the global grid spacing and scan range. """ client = FractalClient(fractal_compute_server) molecules = Molecule.from_smiles("CO") factory = TorsiondriveDatasetFactory() scan_enum = workflow_components.ScanEnumerator() scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#8:3]~[*:4]") factory.add_workflow_components(scan_enum) factory.clear_qcspecs() factory.add_qc_spec(method="openff_unconstrained-1.1.0", basis="smirnoff", program="openmm", spec_description="scan range test", spec_name="openff-1.1.0") dataset = factory.create_dataset( dataset_name="Torsiondrive scan keywords", molecules=molecules, description="Testing scan keywords which overwrite the global settings", tagline="Testing scan keywords which overwrite the global settings") # now set the keywords keys = list(dataset.dataset.keys()) entry = dataset.dataset[keys[0]] entry.keywords = {"grid_spacing": [5], "dihedral_ranges": [(-10, 10)]} # now submit dataset.submit(client=client) fractal_compute_server.await_services(max_iter=50) # make sure of the results are complete ds = client.get_collection("TorsionDriveDataset", dataset.dataset_name) # get the entry record = ds.get_record(ds.df.index[0], "openff-1.1.0") assert record.keywords.grid_spacing == [5] assert record.keywords.grid_spacing != dataset.grid_spacing assert record.keywords.dihedral_ranges == [(-10, 10)] assert record.keywords.dihedral_ranges != dataset.dihedral_ranges
def test_improper_enumerator(): """ Make sure improper torsions are correctly tagged. """ mol = Molecule.from_file(get_data("benzene.sdf")) scan_tagger = workflow_components.ScanEnumerator() # even though there is more than one improper make sure we only get one scan back scan_tagger.add_improper_torsion(smarts="[#6:1](-[#1:2])(:[#6:3]):[#6:4]", central_smarts="[#6:1]", scan_range=(-40, 40), scan_increment=4) result = scan_tagger.apply([mol], processors=1, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY) assert result.n_molecules == 1 indexer = mol.properties["dihedrals"] assert indexer.n_impropers == 1 assert indexer.impropers[0].scan_increment == [4]
def test_ignore_errors_all_datasets(fractal_compute_server, factory_type, capsys): """ For each dataset make sure that when the basis is not fully covered the dataset raises warning errors, and verbose information """ client = FractalClient(fractal_compute_server) # molecule containing boron molecule = Molecule.from_smiles("OB(O)C1=CC=CC=C1") scan_enum = workflow_components.ScanEnumerator() scan_enum.add_torsion_scan(smarts="[#6:1]~[#6:2]-[B:3]~[#8:4]") factory = factory_type() factory.add_workflow_components(scan_enum) factory.clear_qcspecs() # add only mm specs factory.add_qc_spec(method="openff-1.0.0", basis="smirnoff", program="openmm", spec_name="parsley", spec_description="standard parsley spec") dataset = factory.create_dataset( dataset_name=f"Test ignore_error for {factory.type}", molecules=molecule, description="Test ignore errors dataset", tagline="Testing ignore errors datasets", ) # make sure the dataset raises an error here with pytest.raises(MissingBasisCoverageError): dataset.submit(client=client, ignore_errors=False) # now we want to try again and make sure warnings are raised with pytest.warns(UserWarning): dataset.submit(client=client, ignore_errors=True, verbose=True) info = capsys.readouterr() assert info.out == f"Number of new entries: {dataset.n_records}/{dataset.n_records}\n"
def test_scan_enumerator_2d(): """ Make sure one combination of the 2D scan is tagged. """ mol = Molecule.from_smiles("COc1ccc(cc1)N") scan_tagger = workflow_components.ScanEnumerator() scan_tagger.add_double_torsion( smarts1="[*:1]-[#7X3+0:2]-[#6:3]@[#6,#7:4]", smarts2="[#7X3+0:1](-[*:3])(-[*:4])-[#6:2]@[#6,#7]", scan_range1=(-165, 180), scan_range2=(-60, 60), scan_increments=[15, 4]) result = scan_tagger.apply([mol], processors=1, toolkit_registry=GLOBAL_TOOLKIT_REGISTRY) assert result.n_molecules == 1 indexer = mol.properties["dihedrals"] assert indexer.n_double_torsions == 1 assert indexer.double_torsions[((5, 8), (5, 17))].scan_range1 == (-165, 180) assert indexer.double_torsions[((5, 8), (5, 17))].scan_range2 == (-60, 60)