def test_create_torsiondrive_dataset():
    """
    Make sure we can correclt make a dataset using the scan enumerator.
    """
    factory = TorsiondriveDatasetFactory()
    scan_filter = workflow_components.ScanEnumerator()
    scan_filter.add_torsion_scan(smarts="[*:1]~[*:2]-[#8:3]-[#1:4]",
                                 scan_rage=(-90, 90),
                                 scan_increment=10)
    factory.add_workflow_components(scan_filter)
    conformer_generator = workflow_components.StandardConformerGenerator(
        max_conformers=1)
    factory.add_workflow_components(conformer_generator)
    mols = Molecule.from_file(get_data("tautomers_small.smi"),
                              "smi",
                              allow_undefined_stereo=True)
    dataset = factory.create_dataset(dataset_name="test name",
                                     molecules=mols,
                                     description="Force field test",
                                     tagline="A test dataset",
                                     processors=1)

    assert dataset.n_molecules > 0
    assert dataset.n_records > 0
    for entry in dataset.dataset.values():
        assert entry.keywords.dihedral_ranges == [(-90, 90)]
        assert entry.keywords.grid_spacing == [10]
def test_expanding_compute(fractal_compute_server, factory_type):
    """
    Make sure that if we expand the compute of a dataset tasks are generated.
    """
    client = FractalClient(fractal_compute_server)
    molecule = Molecule.from_smiles("CC")
    molecule.generate_conformers(n_conformers=1)
    factory = factory_type()
    scan_enum = workflow_components.ScanEnumerator()
    scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#6:3]~[*:4]")
    factory.add_workflow_components(scan_enum)
    factory.clear_qcspecs()
    # add only mm specs
    factory.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="default",
                        spec_description="standard parsley spec")
    dataset = factory.create_dataset(
        dataset_name=f"Test compute expand {factory.type}",
        molecules=molecule,
        description="Test compute expansion",
        tagline="Testing compute expansion",
    )

    # make sure all expected index get submitted
    dataset.submit(client=client)
    # grab the dataset and check the history
    ds = client.get_collection(dataset.type, dataset.dataset_name)
    assert ds.data.history == {"default"}

    # now make another dataset to expand the compute
    factory.clear_qcspecs()
    # add only mm specs
    factory.add_qc_spec(method="openff-1.2.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="parsley2",
                        spec_description="standard parsley spec")
    dataset = factory.create_dataset(
        dataset_name=f"Test compute expand {factory.type}",
        molecules=[],
        description="Test compute expansion",
        tagline="Testing compute expansion",
    )
    # now submit again
    dataset.submit(client=client)

    # now grab the dataset again and check the tasks list
    ds = client.get_collection(dataset.type, dataset.dataset_name)
    assert ds.data.history == {"default", "parsley2"}
    # make sure a record has been made
    entry = ds.get_entry(ds.df.index[0])
    assert "parsley2" in entry.object_map
def test_scan_enumerator_unique():
    """
    If the enumerator would hit multiple torsions in a molecule make sure they are unique by symmetry.
    """
    mol = Molecule.from_smiles("CCCC")

    scan_tagger = workflow_components.ScanEnumerator()
    scan_tagger.add_torsion_scan(smarts="[*:1]~[#6:2]-[#6:3]~[*:4]")

    result = scan_tagger.apply(molecules=[mol],
                               processors=1,
                               toolkit_registry=GLOBAL_TOOLKIT_REGISTRY)

    assert result.n_molecules == 1
    indexer = mol.properties["dihedrals"]
    assert indexer.n_torsions == 2
def test_scan_enumerator_no_scans():
    """
    Make sure molecules are filtered if they have no scans assigned.
    """
    mol = Molecule.from_smiles("CC")

    scan_tagger = workflow_components.ScanEnumerator()
    scan_tagger.add_torsion_scan(smarts="[*:1]~[#8:1]-[#6:3]~[*:4]",
                                 scan_rage=(-40, 40),
                                 scan_increment=15)

    result = scan_tagger.apply([mol],
                               processors=1,
                               toolkit_registry=GLOBAL_TOOLKIT_REGISTRY)

    assert result.n_molecules == 0
    assert result.n_filtered == 1
def test_scan_enumerator_1d():
    """
    Make sure only one match is tagged per torsion.
    """
    mol = Molecule.from_smiles("CCC")

    scan_tagger = workflow_components.ScanEnumerator()
    scan_tagger.add_torsion_scan(smarts="[*:1]~[#6:2]-[#6:3]~[*:4]",
                                 scan_rage=(-60, 60),
                                 scan_increment=20)

    result = scan_tagger.apply([mol],
                               processors=1,
                               toolkit_registry=GLOBAL_TOOLKIT_REGISTRY)

    assert result.n_molecules == 1
    indexer = mol.properties["dihedrals"]
    assert indexer.n_torsions == 1
    assert indexer.torsions[(1, 2)].scan_range1 == (-60, 60)
def test_adding_dataset_entry_fail(fractal_compute_server, factory_type,
                                   capsys):
    """
    Make sure that the new entries is not incremented if we can not add a molecule to the server due to a name clash.
    TODO add basic dataset into the testing if the api changes to return an error when adding the same index twice
    """
    client = FractalClient(fractal_compute_server)
    molecule = Molecule.from_smiles("CO")
    molecule.generate_conformers(n_conformers=1)
    factory = factory_type()
    scan_enum = workflow_components.ScanEnumerator()
    scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#8:3]~[*:4]")
    factory.add_workflow_components(scan_enum)
    factory.clear_qcspecs()
    # add only mm specs
    factory.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="parsley",
                        spec_description="standard parsley spec")
    dataset = factory.create_dataset(
        dataset_name=f"Test index clash for {factory.type}",
        molecules=molecule,
        description="Test ignore errors dataset",
        tagline="Testing ignore errors datasets",
    )

    # make sure all expected index get submitted
    dataset.submit(client=client, verbose=True)
    info = capsys.readouterr()
    assert info.out == f"Number of new entries: {dataset.n_records}/{dataset.n_records}\n"

    # now add a new spec and try and submit again
    dataset.clear_qcspecs()
    dataset.add_qc_spec(method="mmff94",
                        basis=None,
                        program="rdkit",
                        spec_name="mff94",
                        spec_description="mff94 force field in rdkit")
    dataset.submit(client=client, verbose=True)
    info = capsys.readouterr()
    assert info.out == f"Number of new entries: 0/{dataset.n_records}\n"
def test_torsiondrive_scan_keywords(fractal_compute_server):
    """
    Test running torsiondrives with unique keyword settings which overwrite the global grid spacing and scan range.
    """

    client = FractalClient(fractal_compute_server)
    molecules = Molecule.from_smiles("CO")
    factory = TorsiondriveDatasetFactory()
    scan_enum = workflow_components.ScanEnumerator()
    scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#8:3]~[*:4]")
    factory.add_workflow_components(scan_enum)
    factory.clear_qcspecs()
    factory.add_qc_spec(method="openff_unconstrained-1.1.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_description="scan range test",
                        spec_name="openff-1.1.0")
    dataset = factory.create_dataset(
        dataset_name="Torsiondrive scan keywords",
        molecules=molecules,
        description="Testing scan keywords which overwrite the global settings",
        tagline="Testing scan keywords which overwrite the global settings")

    # now set the keywords
    keys = list(dataset.dataset.keys())
    entry = dataset.dataset[keys[0]]
    entry.keywords = {"grid_spacing": [5], "dihedral_ranges": [(-10, 10)]}

    # now submit
    dataset.submit(client=client)
    fractal_compute_server.await_services(max_iter=50)

    # make sure of the results are complete
    ds = client.get_collection("TorsionDriveDataset", dataset.dataset_name)

    # get the entry
    record = ds.get_record(ds.df.index[0], "openff-1.1.0")
    assert record.keywords.grid_spacing == [5]
    assert record.keywords.grid_spacing != dataset.grid_spacing
    assert record.keywords.dihedral_ranges == [(-10, 10)]
    assert record.keywords.dihedral_ranges != dataset.dihedral_ranges
def test_improper_enumerator():
    """
    Make sure improper torsions are correctly tagged.
    """

    mol = Molecule.from_file(get_data("benzene.sdf"))

    scan_tagger = workflow_components.ScanEnumerator()
    # even though there is more than one improper make sure we only get one scan back
    scan_tagger.add_improper_torsion(smarts="[#6:1](-[#1:2])(:[#6:3]):[#6:4]",
                                     central_smarts="[#6:1]",
                                     scan_range=(-40, 40),
                                     scan_increment=4)

    result = scan_tagger.apply([mol],
                               processors=1,
                               toolkit_registry=GLOBAL_TOOLKIT_REGISTRY)

    assert result.n_molecules == 1
    indexer = mol.properties["dihedrals"]
    assert indexer.n_impropers == 1
    assert indexer.impropers[0].scan_increment == [4]
def test_ignore_errors_all_datasets(fractal_compute_server, factory_type,
                                    capsys):
    """
    For each dataset make sure that when the basis is not fully covered the dataset raises warning errors, and verbose information
    """
    client = FractalClient(fractal_compute_server)
    # molecule containing boron
    molecule = Molecule.from_smiles("OB(O)C1=CC=CC=C1")
    scan_enum = workflow_components.ScanEnumerator()
    scan_enum.add_torsion_scan(smarts="[#6:1]~[#6:2]-[B:3]~[#8:4]")
    factory = factory_type()
    factory.add_workflow_components(scan_enum)
    factory.clear_qcspecs()
    # add only mm specs
    factory.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="parsley",
                        spec_description="standard parsley spec")
    dataset = factory.create_dataset(
        dataset_name=f"Test ignore_error for {factory.type}",
        molecules=molecule,
        description="Test ignore errors dataset",
        tagline="Testing ignore errors datasets",
    )

    # make sure the dataset raises an error here
    with pytest.raises(MissingBasisCoverageError):
        dataset.submit(client=client, ignore_errors=False)

    # now we want to try again and make sure warnings are raised
    with pytest.warns(UserWarning):
        dataset.submit(client=client, ignore_errors=True, verbose=True)

    info = capsys.readouterr()
    assert info.out == f"Number of new entries: {dataset.n_records}/{dataset.n_records}\n"
def test_scan_enumerator_2d():
    """
    Make sure one combination of the 2D scan is tagged.
    """

    mol = Molecule.from_smiles("COc1ccc(cc1)N")

    scan_tagger = workflow_components.ScanEnumerator()
    scan_tagger.add_double_torsion(
        smarts1="[*:1]-[#7X3+0:2]-[#6:3]@[#6,#7:4]",
        smarts2="[#7X3+0:1](-[*:3])(-[*:4])-[#6:2]@[#6,#7]",
        scan_range1=(-165, 180),
        scan_range2=(-60, 60),
        scan_increments=[15, 4])

    result = scan_tagger.apply([mol],
                               processors=1,
                               toolkit_registry=GLOBAL_TOOLKIT_REGISTRY)
    assert result.n_molecules == 1
    indexer = mol.properties["dihedrals"]
    assert indexer.n_double_torsions == 1
    assert indexer.double_torsions[((5, 8), (5, 17))].scan_range1 == (-165,
                                                                      180)
    assert indexer.double_torsions[((5, 8), (5, 17))].scan_range2 == (-60, 60)