def get_optimization_from_server(self,
                                     fractal_uri,
                                     dataset_name,
                                     client=None,
                                     compute_specs=None,
                                     molids=None):
        """Get full optimization data from the given molecules.
    
        """

        if client is None:
            client = FractalClient(fractal_uri, verify=False)

        optds = client.get_collection("OptimizationDataset", dataset_name)
        optds.status()

        df = optds.df

        if (molids is not None) and (len(molids) != 0):
            df = df.loc[list(molids)]

        if compute_specs is not None:
            df = df[compute_specs]

        out = []
        for opt in df.values.flatten():

            if opt.status != 'COMPLETE':
                continue

            optd = self._get_complete_optimization_result(opt, client)
            out.append(optd)

        return out
    def get_optimization_tracebacks(self,
                                    fractal_uri,
                                    dataset_name,
                                    client=None,
                                    compute_specs=None,
                                    molids=None):

        if client is None:
            client = FractalClient(fractal_uri, verify=False)

        optds = client.get_collection("OptimizationDataset", dataset_name)
        optds.status()

        df = optds.df.sort_index(ascending=True)

        if (molids is not None) and (len(molids) != 0):
            df = df.loc[list(molids)]

        if compute_specs is not None:
            df = df[compute_specs]

        errors = df.applymap(lambda x: x.get_error().error_message
                             if x.status == 'ERROR' else None)

        # filter down to only those rows with errors
        errors = errors.dropna(how='all')

        return errors
    def list_optimization_datasets(self, fractal_uri, client=None):
        if client is None:
            client = FractalClient(fractal_uri, verify=False)

        datasets = client.list_collections('OptimizationDataset')

        return datasets.reset_index()['name'].to_list()
    def delete_optimization_datasets(self,
                                     fractal_uri,
                                     dataset_names,
                                     client=None):
        if client is None:
            client = FractalClient(fractal_uri, verify=False)

        for dataset_name in dataset_names:
            client.delete_collection('OptimizationDataset', dataset_name)
Beispiel #5
0
def test_optimization_submissions_with_constraints(fractal_compute_server):
    """
    Make sure that the constraints are added to the optimization and enforced.
    """
    client = FractalClient(fractal_compute_server)
    ethane = Molecule.from_file(get_data("ethane.sdf"), "sdf")
    factory = OptimizationDatasetFactory()
    dataset = OptimizationDataset(
        dataset_name="Test optimizations with constraint",
        description="Test optimization dataset with constraints",
        tagline="Testing optimization datasets")
    # add just mm spec
    dataset.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="default",
                        spec_description="mm default spec",
                        overwrite=True)
    # build some constraints
    constraints = Constraints()
    constraints.add_set_constraint(constraint_type="dihedral",
                                   indices=[2, 0, 1, 5],
                                   value=60,
                                   bonded=True)
    constraints.add_freeze_constraint(constraint_type="distance",
                                      indices=[0, 1],
                                      bonded=True)
    # add the molecule
    attributes = factory.create_cmiles_metadata(ethane)
    index = ethane.to_smiles()
    dataset.add_molecule(index=index,
                         molecule=ethane,
                         attributes=attributes,
                         constraints=constraints)
    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("OptimizationDataset", dataset.dataset_name)
    record = ds.get_record(ds.df.index[0], "default")
    assert "constraints" in record.keywords
    assert record.status.value == "COMPLETE"
    assert record.error is None
    assert len(record.trajectory) > 1

    # now make sure the constraints worked
    final_molecule = record.get_final_molecule()
    assert pytest.approx(60, final_molecule.measure((2, 0, 1, 5)))
    assert pytest.approx(record.get_initial_molecule().measure((0, 1)),
                         final_molecule.measure((0, 1)))
def test_expanding_compute(fractal_compute_server, factory_type):
    """
    Make sure that if we expand the compute of a dataset tasks are generated.
    """
    client = FractalClient(fractal_compute_server)
    molecule = Molecule.from_smiles("CC")
    molecule.generate_conformers(n_conformers=1)
    factory = factory_type()
    scan_enum = workflow_components.ScanEnumerator()
    scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#6:3]~[*:4]")
    factory.add_workflow_components(scan_enum)
    factory.clear_qcspecs()
    # add only mm specs
    factory.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="default",
                        spec_description="standard parsley spec")
    dataset = factory.create_dataset(
        dataset_name=f"Test compute expand {factory.type}",
        molecules=molecule,
        description="Test compute expansion",
        tagline="Testing compute expansion",
    )

    # make sure all expected index get submitted
    dataset.submit(client=client)
    # grab the dataset and check the history
    ds = client.get_collection(dataset.type, dataset.dataset_name)
    assert ds.data.history == {"default"}

    # now make another dataset to expand the compute
    factory.clear_qcspecs()
    # add only mm specs
    factory.add_qc_spec(method="openff-1.2.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="parsley2",
                        spec_description="standard parsley spec")
    dataset = factory.create_dataset(
        dataset_name=f"Test compute expand {factory.type}",
        molecules=[],
        description="Test compute expansion",
        tagline="Testing compute expansion",
    )
    # now submit again
    dataset.submit(client=client)

    # now grab the dataset again and check the tasks list
    ds = client.get_collection(dataset.type, dataset.dataset_name)
    assert ds.data.history == {"default", "parsley2"}
    # make sure a record has been made
    entry = ds.get_entry(ds.df.index[0])
    assert "parsley2" in entry.object_map
    def set_optimization_tag(self, fractal_uri, tag, dataset_name):
        from qcportal.models.task_models import PriorityEnum

        client = FractalClient(fractal_uri, verify=False)

        optds = client.get_collection("OptimizationDataset", dataset_name)
        optds.status()
        opts = optds.df.values.flatten()

        optids = [opt.id for opt in opts if opt.status != 'COMPLETE']
        for id in optids:
            client.modify_tasks(operation='modify',
                                base_result=id,
                                new_tag=tag)
Beispiel #8
0
def cached_fractal_client(address: str) -> FractalClient:
    """Returns a cached copy of a fractal client."""

    try:

        return FractalClient(address)

    except ConnectionRefusedError as e:

        # Try to handle the case when connecting to a local snowflake.
        try:
            return FractalClient(address, verify=False)
        except ConnectionRefusedError:
            raise e
    def submit_molecules(self,
                         fractal_uri,
                         input_paths,
                         season,
                         dataset_name,
                         recursive=False):
        """Submit SDF molecules from given directory to the target QCFractal server.
    
        Parameters
        ----------
        fractal_uri : str
            Target QCFractal server URI.
        input_paths : iterable of Path-like
            Paths to SDF files or directories; for directories, all SDF files are loaded.
        season : str
            Benchmark season identifier. Indicates the mix of compute specs to utilize.
        dataset_name : str
            Dataset name to use for submission on the QCFractal server.
        recursive : bool
            If True, recursively load SDFs from any directories given in `input_paths`.
    
        """
        from openff.qcsubmit.factories import OptimizationDataset, OptimizationDatasetFactory

        # extract molecules from SDF inputs
        mols = mols_from_paths(input_paths, recursive=recursive)

        ds = self._create_qcsubmit_dataset(dataset_name, mols, season)

        print("Submitting...")
        client = FractalClient(fractal_uri, verify=False)
        ds.submit(verbose=True, client=client)
        print("Submitted!")
Beispiel #10
0
def test_ignore_errors_all_datasets(fractal_compute_server, factory_type):
    """
    For each dataset make sure that when the basis is not fully covered the dataset raises warning errors.
    """
    import warnings
    client = FractalClient(fractal_compute_server)
    # molecule containing boron
    molecule = Molecule.from_smiles("OB(O)C1=CC=CC=C1")
    factory = factory_type()
    factory.clear_qcspecs()
    # add only mm specs
    factory.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="parsley",
                        spec_description="standard parsley spec")
    dataset = factory.create_dataset(
        dataset_name=f"Test ignore_error for {factory.Config.title}",
        molecules=molecule,
        description="Test ignore errors dataset",
        tagline="Testing ignore errors datasets",
    )

    dataset.metadata.long_description_url = "https://test.org"

    # make sure the dataset rasies an error here
    with pytest.raises(MissingBasisCoverageError):
        dataset.submit(client=client, ignore_errors=False)

    # now we want to try again and make sure warnings are rasied
    with pytest.warns(UserWarning):
        dataset.submit(client=client, ignore_errors=True)
def test_index_not_changed(fractal_compute_server, factory_type):
    """
    Make sure that when we submit molecules from a dataset/optimizationdataset with one input conformer that the index is not changed.
    """
    factory = factory_type()
    factory.clear_qcspecs()
    client = FractalClient(fractal_compute_server)
    # add only mm specs
    factory.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="parsley",
                        spec_description="standard parsley spec")

    molecule = Molecule.from_smiles("C")
    # make sure we only have one conformer
    molecule.generate_conformers(n_conformers=1)
    dataset = factory.create_dataset(
        dataset_name=f"Test index change for {factory.factory_type}",
        molecules=molecule,
        description="Test index change dataset",
        tagline="Testing index changes datasets",
    )

    dataset.metadata.long_description_url = "https://test.org"
    # now change the index name to something unique
    entry = dataset.dataset.pop(list(dataset.dataset.keys())[0])
    entry.index = "my_unique_index"
    dataset.dataset[entry.index] = entry

    dataset.submit(client=client)

    # pull the dataset and make sure our index is present
    ds = client.get_collection(dataset.dataset_type, dataset.dataset_name)

    if dataset.dataset_type == "DataSet":
        query = ds.get_records(method="openff-1.0.0",
                               basis="smirnoff",
                               program="openmm")
        assert "my_unique_index" in query.index
    else:
        assert "my_unique_index" in ds.df.index
    def set_optimization_priority(self, fractal_uri, priority, dataset_name):
        from qcportal.models.task_models import PriorityEnum

        client = FractalClient(fractal_uri, verify=False)

        optds = client.get_collection("OptimizationDataset", dataset_name)
        optds.status()
        opts = optds.df.values.flatten()

        priority_map = {
            "high": PriorityEnum.HIGH,
            "normal": PriorityEnum.NORMAL,
            "low": PriorityEnum.LOW
        }

        optids = [opt.id for opt in opts if opt.status != 'COMPLETE']
        for id in optids:
            client.modify_tasks(operation='modify',
                                base_result=id,
                                new_priority=priority_map[priority])
def test_torsiondrive_scan_keywords(fractal_compute_server):
    """
    Test running torsiondrives with unique keyword settings which overwrite the global grid spacing and scan range.
    """

    client = FractalClient(fractal_compute_server)
    molecules = Molecule.from_smiles("CO")
    factory = TorsiondriveDatasetFactory()
    scan_enum = workflow_components.ScanEnumerator()
    scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#8:3]~[*:4]")
    factory.add_workflow_components(scan_enum)
    factory.clear_qcspecs()
    factory.add_qc_spec(method="openff_unconstrained-1.1.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_description="scan range test",
                        spec_name="openff-1.1.0")
    dataset = factory.create_dataset(
        dataset_name="Torsiondrive scan keywords",
        molecules=molecules,
        description="Testing scan keywords which overwrite the global settings",
        tagline="Testing scan keywords which overwrite the global settings")

    # now set the keywords
    keys = list(dataset.dataset.keys())
    entry = dataset.dataset[keys[0]]
    entry.keywords = {"grid_spacing": [5], "dihedral_ranges": [(-10, 10)]}

    # now submit
    dataset.submit(client=client)
    fractal_compute_server.await_services(max_iter=50)

    # make sure of the results are complete
    ds = client.get_collection("TorsionDriveDataset", dataset.dataset_name)

    # get the entry
    record = ds.get_record(ds.df.index[0], "openff-1.1.0")
    assert record.keywords.grid_spacing == [5]
    assert record.keywords.grid_spacing != dataset.grid_spacing
    assert record.keywords.dihedral_ranges == [(-10, 10)]
    assert record.keywords.dihedral_ranges != dataset.dihedral_ranges
    def get_optimization_status(self,
                                fractal_uri,
                                dataset_name,
                                client=None,
                                compute_specs=None,
                                molids=None):
        """Get status of optimization for each molecule ID.
    
        """
        if client is None:
            client = FractalClient(fractal_uri, verify=False)

        optds = client.get_collection("OptimizationDataset", dataset_name)
        optds.status()

        df = optds.df.sort_index(ascending=True)

        if (molids is not None) and (len(molids) != 0):
            df = df.loc[list(molids)]

        if compute_specs is not None:
            df = df[compute_specs]

        return df
def test_adding_dataset_entry_fail(fractal_compute_server, factory_type,
                                   capsys):
    """
    Make sure that the new entries is not incremented if we can not add a molecule to the server due to a name clash.
    TODO add basic dataset into the testing if the api changes to return an error when adding the same index twice
    """
    client = FractalClient(fractal_compute_server)
    molecule = Molecule.from_smiles("CO")
    molecule.generate_conformers(n_conformers=1)
    factory = factory_type()
    scan_enum = workflow_components.ScanEnumerator()
    scan_enum.add_torsion_scan(smarts="[*:1]~[#6:2]-[#8:3]~[*:4]")
    factory.add_workflow_components(scan_enum)
    factory.clear_qcspecs()
    # add only mm specs
    factory.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="parsley",
                        spec_description="standard parsley spec")
    dataset = factory.create_dataset(
        dataset_name=f"Test index clash for {factory.type}",
        molecules=molecule,
        description="Test ignore errors dataset",
        tagline="Testing ignore errors datasets",
    )

    # make sure all expected index get submitted
    dataset.submit(client=client, verbose=True)
    info = capsys.readouterr()
    assert info.out == f"Number of new entries: {dataset.n_records}/{dataset.n_records}\n"

    # now add a new spec and try and submit again
    dataset.clear_qcspecs()
    dataset.add_qc_spec(method="mmff94",
                        basis=None,
                        program="rdkit",
                        spec_name="mff94",
                        spec_description="mff94 force field in rdkit")
    dataset.submit(client=client, verbose=True)
    info = capsys.readouterr()
    assert info.out == f"Number of new entries: 0/{dataset.n_records}\n"
def test_ignore_errors_all_datasets(fractal_compute_server, factory_type,
                                    capsys):
    """
    For each dataset make sure that when the basis is not fully covered the dataset raises warning errors, and verbose information
    """
    client = FractalClient(fractal_compute_server)
    # molecule containing boron
    molecule = Molecule.from_smiles("OB(O)C1=CC=CC=C1")
    scan_enum = workflow_components.ScanEnumerator()
    scan_enum.add_torsion_scan(smarts="[#6:1]~[#6:2]-[B:3]~[#8:4]")
    factory = factory_type()
    factory.add_workflow_components(scan_enum)
    factory.clear_qcspecs()
    # add only mm specs
    factory.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="parsley",
                        spec_description="standard parsley spec")
    dataset = factory.create_dataset(
        dataset_name=f"Test ignore_error for {factory.type}",
        molecules=molecule,
        description="Test ignore errors dataset",
        tagline="Testing ignore errors datasets",
    )

    # make sure the dataset raises an error here
    with pytest.raises(MissingBasisCoverageError):
        dataset.submit(client=client, ignore_errors=False)

    # now we want to try again and make sure warnings are raised
    with pytest.warns(UserWarning):
        dataset.submit(client=client, ignore_errors=True, verbose=True)

    info = capsys.readouterr()
    assert info.out == f"Number of new entries: {dataset.n_records}/{dataset.n_records}\n"
    def errorcycle_optimizations(self,
                                 fractal_uri,
                                 dataset_name,
                                 client=None,
                                 compute_specs=None,
                                 molids=None):
        """Restart optimizations that have failed.

        Parameters
        ----------
        compute_specs : iterable 
            Iterable of compute spec names to error cycle only.
        molids : iterable 
            Iterable of molecule ids to error cycle only.
    
        """
        if client is None:
            client = FractalClient(fractal_uri, verify=False)

        optds = client.get_collection("OptimizationDataset", dataset_name)
        optds.status()

        df = optds.df

        if (molids is not None) and (len(molids) != 0):
            df = df.loc[list(molids)]

        if compute_specs is not None:
            df = df[compute_specs]

        for opt in df.values.flatten():
            if opt.status == 'ERROR':
                client.modify_tasks(operation='restart', base_result=opt.id)
                print(f"Restarted ERRORed optimization `{opt.id}`")
            if opt.status == 'INCOMPLETE' and (opt.final_molecule is not None):
                client.modify_tasks(operation='regenerate', base_result=opt.id)
                print(f"Regnerated INCOMPLETE optimization `{opt.id}`")
Beispiel #18
0
def test_basic_submissions_wavefunction(fractal_compute_server):
    """
    Test submitting a basic dataset with a wavefunction protocol and make sure it is executed.
    """
    # only a psi4 test
    if not has_program("psi4"):
        pytest.skip(f"Program psi4 not found.")

    client = FractalClient(fractal_compute_server)
    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = BasicDatasetFactory(driver="energy")
    factory.clear_qcspecs()
    factory.add_qc_spec(method="hf",
                        basis="sto-6g",
                        program="psi4",
                        spec_name="default",
                        spec_description="wavefunction spec",
                        store_wavefunction="orbitals_and_eigenvalues")

    dataset = factory.create_dataset(
        dataset_name=f"Test single points with wavefunction",
        molecules=molecules,
        description="Test basics dataset",
        tagline="Testing single point datasets with wavefunction",
    )
    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # submit the dataset
    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("Dataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    assert ds.data.description == dataset.description
    assert ds.data.tagline == dataset.dataset_tagline
    assert ds.data.tags == dataset.dataset_tags

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    assert ds.data.default_driver == dataset.driver

    # get the last ran spec
    for specification in ds.data.history:
        driver, program, method, basis, spec_name = specification
        spec = dataset.qc_specifications[spec_name]
        assert driver == dataset.driver
        assert program == spec.program
        assert method == spec.method
        assert basis == spec.basis

    for spec in dataset.qc_specifications.values():
        query = ds.get_records(
            method=spec.method,
            basis=spec.basis,
            program=spec.program,
        )
        for index in query.index:
            result = query.loc[index].record
            assert result.status.value.upper() == "COMPLETE"
            assert result.error is None
            assert result.return_result is not None
            basis = result.get_wavefunction("basis")
            assert basis.name.lower() == "sto-6g"
            orbitals = result.get_wavefunction("orbitals_a")
            assert orbitals.shape is not None
Beispiel #19
0
def test_basic_submissions_single_spec(fractal_compute_server, specification):
    """Test submitting a basic dataset to a snowflake server."""

    client = FractalClient(fractal_compute_server)

    qc_spec, driver = specification

    program = qc_spec["program"]
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = BasicDatasetFactory(driver=driver)
    factory.add_qc_spec(**qc_spec,
                        spec_name="default",
                        spec_description="testing the single points",
                        overwrite=True)

    dataset = factory.create_dataset(
        dataset_name=f"Test single points info {program}, {driver}",
        molecules=molecules,
        description="Test basics dataset",
        tagline="Testing single point datasets",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("Dataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    assert ds.data.description == dataset.description
    assert ds.data.tagline == dataset.dataset_tagline
    assert ds.data.tags == dataset.dataset_tags

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    assert ds.data.default_driver == dataset.driver

    # get the last ran spec
    for specification in ds.data.history:
        driver, program, method, basis, spec_name = specification
        spec = dataset.qc_specifications[spec_name]
        assert driver == dataset.driver
        assert program == spec.program
        assert method == spec.method
        assert basis == spec.basis
        break
    else:
        raise RuntimeError(
            f"The requested compute was not found in the history {ds.data.history}"
        )

    for spec in dataset.qc_specifications.values():
        query = ds.get_records(
            method=spec.method,
            basis=spec.basis,
            program=spec.program,
        )
        for index in query.index:
            result = query.loc[index].record
            assert result.status.value.upper() == "COMPLETE"
            assert result.error is None
            assert result.return_result is not None
Beispiel #20
0
def test_adding_compute(fractal_compute_server, dataset_data):
    """
    Test adding new compute to each of the dataset types using none psi4 programs.
    """
    client = FractalClient(fractal_compute_server)
    mol = Molecule.from_smiles("CO")
    factory_type, dataset_type = dataset_data
    # make and clear out the qc specs
    factory = factory_type()
    factory.clear_qcspecs()
    factory.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="default",
                        spec_description="default spec for openff")
    dataset = factory.create_dataset(
        dataset_name=f"Test adding compute to {factory_type}",
        molecules=mol,
        description=f"Testing adding compute to a {dataset_type} dataset",
        tagline="tests for adding compute.")

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)
    # make sure that the compute has finished
    fractal_compute_server.await_results()
    fractal_compute_server.await_services(max_iter=50)

    # now lets make a dataset with new compute and submit it
    # transfer the metadata to compare the elements
    compute_dataset = dataset_type(dataset_name=dataset.dataset_name,
                                   metadata=dataset.metadata)
    compute_dataset.clear_qcspecs()
    # now add the new compute spec
    compute_dataset.add_qc_spec(method="uff",
                                basis=None,
                                program="rdkit",
                                spec_name="rdkit",
                                spec_description="rdkit basic spec")

    # make sure the dataset has no molecules and submit it
    assert compute_dataset.dataset == {}
    compute_dataset.submit(client=client)
    # make sure that the compute has finished
    fractal_compute_server.await_results()
    fractal_compute_server.await_services(max_iter=50)

    # make sure of the results are complete
    ds = client.get_collection(dataset.dataset_type, dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    assert ds.data.description == dataset.description
    assert ds.data.tagline == dataset.dataset_tagline
    assert ds.data.tags == dataset.dataset_tags

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # update all specs into one dataset
    dataset.add_qc_spec(**compute_dataset.qc_specifications["rdkit"].dict())
    # get the last ran spec
    if dataset.dataset_type == "DataSet":
        for specification in ds.data.history:
            driver, program, method, basis, spec_name = specification
            spec = dataset.qc_specifications[spec_name]
            assert driver == dataset.driver
            assert program == spec.program
            assert method == spec.method
            assert basis == spec.basis

        for spec in dataset.qc_specifications.values():
            query = ds.get_records(
                method=spec.method,
                basis=spec.basis,
                program=spec.program,
            )
            for index in query.index:
                result = query.loc[index].record
                assert result.status.value.upper() == "COMPLETE"
                assert result.error is None
                assert result.return_result is not None
    else:
        # check the qc spec
        for qc_spec in dataset.qc_specifications.values():
            spec = ds.data.specs[qc_spec.spec_name]

            assert spec.description == qc_spec.spec_description
            assert spec.qc_spec.driver == dataset.driver
            assert spec.qc_spec.method == qc_spec.method
            assert spec.qc_spec.basis == qc_spec.basis
            assert spec.qc_spec.program == qc_spec.program

            # check the keywords
            keywords = client.query_keywords(spec.qc_spec.keywords)[0]

            assert keywords.values["maxiter"] == dataset.maxiter
            assert keywords.values["scf_properties"] == dataset.scf_properties

            # query the dataset
            ds.query(qc_spec.spec_name)

            for index in ds.df.index:
                record = ds.df.loc[index].default
                # this will take some time so make sure it is running with no error
                assert record.status.value == "COMPLETE", print(record.dict())
                assert record.error is None
Beispiel #21
0
def test_adding_specifications(fractal_compute_server):
    """
    Test adding specifications to datasets.
    Here we are testing multiple scenarios:
    1) Adding an identical specification to a dataset
    2) Adding a spec with the same name as another but with different options
    3) overwrite a spec which was added but never used.
    """
    client = FractalClient(fractal_compute_server)
    mol = Molecule.from_smiles("CO")
    # make a dataset
    factory = OptimizationDatasetFactory()
    opt_dataset = factory.create_dataset(
        dataset_name="Specification error check",
        molecules=mol,
        description="test adding new compute specs to datasets",
        tagline="test adding new compute specs")
    opt_dataset.clear_qcspecs()
    # add a new mm spec
    opt_dataset.add_qc_spec(method="openff-1.0.0",
                            basis="smirnoff",
                            program="openmm",
                            spec_description="default openff spec",
                            spec_name="openff-1.0.0")

    opt_dataset.metadata.long_description_url = "https://test.org"
    # submit the optimizations and let the compute run
    opt_dataset.submit(client=client, await_result=False)
    fractal_compute_server.await_results()
    fractal_compute_server.await_services()

    # grab the collection
    ds = client.get_collection(opt_dataset.dataset_type,
                               opt_dataset.dataset_name)

    # now try and add the specification again this should return True
    assert opt_dataset.add_dataset_specification(
        spec=opt_dataset.qc_specifications["openff-1.0.0"],
        opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(),
        collection=ds) is True

    # now change part of the spec but keep the name the same
    opt_dataset.clear_qcspecs()
    opt_dataset.add_qc_spec(method="openff-1.2.1",
                            basis="smirnoff",
                            spec_name="openff-1.0.0",
                            program="openmm",
                            spec_description="openff-1.2.1 with wrong name.")

    # now try and add this specification with the same name but different settings
    with pytest.raises(QCSpecificationError):
        opt_dataset.add_dataset_specification(
            spec=opt_dataset.qc_specifications["openff-1.0.0"],
            opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(),
            collection=ds)

    # now add a new specification but no compute and make sure it is overwritten
    opt_dataset.clear_qcspecs()
    opt_dataset.add_qc_spec(method="ani1x",
                            basis=None,
                            program="torchani",
                            spec_name="ani",
                            spec_description="a ani spec")
    assert opt_dataset.add_dataset_specification(
        spec=opt_dataset.qc_specifications["ani"],
        opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(),
        collection=ds) is True

    # now change the spec slightly and add again
    opt_dataset.clear_qcspecs()
    opt_dataset.add_qc_spec(method="ani1ccx",
                            basis=None,
                            program="torchani",
                            spec_name="ani",
                            spec_description="a ani spec")
    assert opt_dataset.add_dataset_specification(
        spec=opt_dataset.qc_specifications["ani"],
        opt_spec=opt_dataset.optimization_procedure.get_optimzation_spec(),
        collection=ds) is True
    def execute_optimization_from_server(self,
                                         fractal_uri,
                                         dataset_name,
                                         output_directory=None,
                                         ncores=1,
                                         memory=2,
                                         client=None,
                                         compute_specs=None,
                                         molids=None,
                                         scf_maxiter=200,
                                         geometric_maxiter=300,
                                         geometric_coordsys='dlc',
                                         geometric_qccnv=False):
        """Execute optimization from the given molecule locally on this host.

        Will not send results back to the server; this is purely for debugging.

        TODO: make this send results back to server using same API as manager does.
              then merge with `execute_...` above.
    
        """
        from datetime import datetime
        import json

        if client is None:
            client = FractalClient(fractal_uri, verify=False)

        optds = client.get_collection("OptimizationDataset", dataset_name)
        optds.status()

        df = optds.df

        if (molids is not None) and (len(molids) != 0):
            df = df.loc[list(molids)]

        if compute_specs is not None:
            df = df[compute_specs]

        local_options = {"ncores": ncores, "memory": memory}

        results = []
        for spec_name in df:

            if output_directory is not None:
                os.makedirs(os.path.join(output_directory, spec_name,
                                         'error_mols'),
                            exist_ok=True)

            print("Processing spec: '{}'".format(spec_name))
            for id, opt in df[spec_name].iteritems():

                # fix to ensure output fidelity of ids; losing 02 padding on conformer
                org, molecule, conformer = id.split('-')
                output_id = "{org}-{molecule:05}-{conformer:02}".format(
                    org=org, molecule=int(molecule), conformer=int(conformer))

                # subfolders for each compute spec, files named according to molecule ids
                if output_directory is not None:
                    outfile = "{}".format(
                        os.path.join(output_directory, spec_name, output_id))

                print("... '{}'".format(id))
                #task = client.query_tasks(base_result=opt.id)[0]
                inputs = self._args_from_optimizationrecord(opt, client)

                # execute optimization
                start_dt = datetime.utcnow()
                result = self._execute_qcengine(
                    inputs,
                    local_options=local_options,
                    scf_maxiter=scf_maxiter,
                    geometric_maxiter=geometric_maxiter,
                    geometric_coordsys=geometric_coordsys,
                    geometric_qccnv=geometric_qccnv)

                end_dt = datetime.utcnow()
                perfd = {
                    'start': start_dt.isoformat(),
                    'end': end_dt.isoformat()
                }

                if output_directory is not None:
                    if result.success:
                        try:
                            final_molecule = self._process_optimization_result(
                                output_id, result)
                            self._execute_output_results(
                                output_id=output_id,
                                resultjson=result.json(),
                                final_molecule=final_molecule,
                                outfile=outfile,
                                success=True,
                                perfd=perfd)
                        except Exception as e:
                            print("... '{}' : export error".format(id))
                            final_molecule = None

                            error_outfile = "{}".format(
                                os.path.join(output_directory, spec_name,
                                             'error_mols', output_id))

                            try:
                                with open("{}.txt".format(error_outfile),
                                          'w') as f:
                                    f.write(str(e))
                            except:
                                pass

                            self._execute_output_results(
                                output_id=output_id,
                                resultjson=result.json(),
                                final_molecule=final_molecule,
                                outfile=error_outfile,
                                success=False,
                                perfd=perfd)
                    else:
                        print("... '{}' : compute failed".format(id))
                        final_molecule = None
                        error_outfile = "{}".format(
                            os.path.join(output_directory, spec_name,
                                         'error_mols', output_id))

                        self._execute_output_results(
                            output_id=output_id,
                            resultjson=result,
                            final_molecule=final_molecule,
                            outfile=error_outfile,
                            success=False,
                            perfd=perfd)

                results.append(result)

        return results
Beispiel #23
0
def test_torsiondrive_submissions(fractal_compute_server, specification):
    """
    Test submitting a torsiondrive dataset and computing it.
    """

    client = FractalClient(fractal_compute_server)

    qc_spec, driver = specification
    program = qc_spec["program"]
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    molecules = Molecule.from_smiles("CO")

    factory = TorsiondriveDatasetFactory(driver=driver)
    factory.add_qc_spec(**qc_spec,
                        spec_name="default",
                        spec_description="test",
                        overwrite=True)

    dataset = factory.create_dataset(
        dataset_name=f"Test torsiondrives info {program}, {driver}",
        molecules=molecules,
        description="Test torsiondrive dataset",
        tagline="Testing torsiondrive datasets",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_services(max_iter=50)

    # make sure of the results are complete
    ds = client.get_collection("TorsionDriveDataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    for qc_spec in dataset.qc_specifications.values():
        spec = ds.data.specs[qc_spec.spec_name]

        assert spec.description == qc_spec.spec_description
        assert spec.qc_spec.driver == dataset.driver
        assert spec.qc_spec.method == qc_spec.method
        assert spec.qc_spec.basis == qc_spec.basis
        assert spec.qc_spec.program == qc_spec.program

        # check the keywords
        keywords = client.query_keywords(spec.qc_spec.keywords)[0]

        assert keywords.values["maxiter"] == dataset.maxiter
        assert keywords.values["scf_properties"] == dataset.scf_properties

        # query the dataset
        ds.query(qc_spec.spec_name)

        for index in ds.df.index:
            record = ds.df.loc[index].default
            # this will take some time so make sure it is running with no error
            assert record.status.value == "COMPLETE", print(record.dict())
            assert record.error is None
            assert len(record.final_energy_dict) == 24
    def export_molecule_data(self,
                             fractal_uri,
                             output_directory,
                             dataset_name,
                             delete_existing=False,
                             keep_existing=True):
        """Export all molecule data from target QCFractal server to the given directory.
    
        Parameters
        ----------
        fractal_uri : str
            Target QCFractal server URI.
        output_directory : str
            Directory path to deposit exported data.
        dataset_name : str
            Dataset name to extract from the QCFractal server.
        delete_existing : bool (False)
            If True, delete existing directory if present.
        keep_existing : bool (True)
            If True, keep existing files in export directory.
            Files corresponding to server data will not be re-exported.
            Relies *only* on filepaths of existing files for determining match.
    
        """
        import json

        # get dataset
        client = FractalClient(fractal_uri, verify=False)
        optds = client.get_collection("OptimizationDataset", dataset_name)
        optds.status()

        try:
            os.makedirs(output_directory)
        except OSError:
            if delete_existing:
                shutil.rmtree(output_directory)
            elif keep_existing:
                pass
            else:
                raise Exception(
                    f'Output directory {output_directory} already exists. '
                    'Specify `delete_existing=True` to remove, or `keep_existing=True` to tolerate'
                )

        # for each compute spec, create a folder in the output directory
        # deposit SDF giving final molecule, energy
        specs = optds.list_specifications().index.tolist()
        for spec in specs:
            print("Exporting spec: '{}'".format(spec))
            os.makedirs(os.path.join(output_directory, spec, 'error_mols'),
                        exist_ok=True)
            optentspec = optds.get_specification(spec)

            records = optds.data.dict()['records']

            for id, opt in optds.df[spec].iteritems():

                # skip incomplete cases
                if opt.final_molecule is None:
                    print("... '{}' : skipping INCOMPLETE".format(id))
                    continue

                # fix to ensure output fidelity of ids; losing 02 padding on conformer
                org, molecule, conformer = id.split('-')
                output_id = "{org}-{molecule:05}-{conformer:02}".format(
                    org=org, molecule=int(molecule), conformer=int(conformer))

                # subfolders for each compute spec, files named according to molecule ids
                outfile = "{}".format(
                    os.path.join(output_directory, spec, output_id))

                # if we did not delete everything at the start and the path already exists,
                # skip this one; reduces processing and writes to filesystem
                if (not delete_existing) and os.path.exists(
                        "{}.sdf".format(outfile)):
                    print("... '{}' : skipping SDF exists".format(id))
                    continue

                print("... '{}' : exporting COMPLETE".format(id))
                optd = self._get_complete_optimization_result(opt, client)
                optdjson = json.dumps(optd)

                perfd = {
                    'walltime': opt.provenance.wall_time,
                    'completed': opt.modified_on.isoformat()
                }

                try:
                    offmol = self._mol_from_qcserver(records[id.lower()])

                    # set conformer as final, optimized geometry
                    final_qcmol = opt.get_final_molecule()
                    final_molecule = self._process_final_mol(
                        output_id, offmol, final_qcmol,
                        optentspec.qc_spec.method, optentspec.qc_spec.basis,
                        optentspec.qc_spec.program, opt.energies)

                    self._execute_output_results(output_id=output_id,
                                                 resultjson=optdjson,
                                                 final_molecule=final_molecule,
                                                 outfile=outfile,
                                                 success=True,
                                                 perfd=perfd)

                except Exception as e:
                    print("... '{}' : export error".format(id))
                    final_molecule = None

                    error_outfile = "{}".format(
                        os.path.join(output_directory, spec, 'error_mols',
                                     output_id))

                    try:
                        with open("{}.txt".format(error_outfile), 'w') as f:
                            f.write(str(e))
                    except:
                        pass

                    self._execute_output_results(output_id=output_id,
                                                 resultjson=optdjson,
                                                 final_molecule=final_molecule,
                                                 outfile=error_outfile,
                                                 success=False,
                                                 perfd=perfd)
Beispiel #25
0
def test_optimization_submissions(fractal_compute_server, specification):
    """Test submitting an Optimization dataset to a snowflake server."""

    client = FractalClient(fractal_compute_server)

    qc_spec, driver = specification
    program = qc_spec["program"]
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = OptimizationDatasetFactory(driver=driver)
    factory.add_qc_spec(**qc_spec,
                        spec_name="default",
                        spec_description="test",
                        overwrite=True)

    dataset = factory.create_dataset(
        dataset_name=f"Test optimizations info {program}, {driver}",
        molecules=molecules[:2],
        description="Test optimization dataset",
        tagline="Testing optimization datasets",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("OptimizationDataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    for qc_spec in dataset.qc_specifications.values():
        spec = ds.data.specs[qc_spec.spec_name]

        assert spec.description == qc_spec.spec_description
        assert spec.qc_spec.driver == dataset.driver
        assert spec.qc_spec.method == qc_spec.method
        assert spec.qc_spec.basis == qc_spec.basis
        assert spec.qc_spec.program == qc_spec.program

        # check the keywords
        keywords = client.query_keywords(spec.qc_spec.keywords)[0]

        assert keywords.values["maxiter"] == dataset.maxiter
        assert keywords.values["scf_properties"] == dataset.scf_properties

        # query the dataset
        ds.query(qc_spec.spec_name)

        for index in ds.df.index:
            record = ds.df.loc[index].default
            assert record.status.value == "COMPLETE"
            assert record.error is None
            assert len(record.trajectory) > 1
            # if we used psi4 make sure the properties were captured
            if program == "psi4":
                result = record.get_trajectory()[0]
                assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys()
                assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys()
Beispiel #26
0
def test_optimization_submissions_with_pcm(fractal_compute_server):
    """Test submitting an Optimization dataset to a snowflake server with PCM."""

    client = FractalClient(fractal_compute_server)

    program = "psi4"
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    # use a single small molecule due to the extra time PCM takes
    molecules = Molecule.from_smiles("C")

    factory = OptimizationDatasetFactory(driver="gradient")
    factory.add_qc_spec(method="hf",
                        basis="sto-3g",
                        program=program,
                        spec_name="default",
                        spec_description="test",
                        implicit_solvent=PCMSettings(units="au",
                                                     medium_Solvent="water"),
                        overwrite=True)

    dataset = factory.create_dataset(
        dataset_name=f"Test optimizations info with pcm water",
        molecules=molecules,
        description="Test optimization dataset",
        tagline="Testing optimization datasets",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("OptimizationDataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    for qc_spec in dataset.qc_specifications.values():
        spec = ds.data.specs[qc_spec.spec_name]

        assert spec.description == qc_spec.spec_description
        assert spec.qc_spec.driver == dataset.driver
        assert spec.qc_spec.method == qc_spec.method
        assert spec.qc_spec.basis == qc_spec.basis
        assert spec.qc_spec.program == qc_spec.program

        # check the keywords
        keywords = client.query_keywords(spec.qc_spec.keywords)[0]

        assert keywords.values["maxiter"] == dataset.maxiter
        assert keywords.values["scf_properties"] == dataset.scf_properties

        # query the dataset
        ds.query(qc_spec.spec_name)

        for index in ds.df.index:
            record = ds.df.loc[index].default
            assert record.status.value == "COMPLETE"
            assert record.error is None
            assert len(record.trajectory) > 1
            result = record.get_trajectory()[0]
            assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys()
            assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys()
            # make sure the PCM result was captured
            assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
Beispiel #27
0
def test_basic_submissions_multiple_spec(fractal_compute_server):
    """Test submitting a basic dataset to a snowflake server with multiple qcspecs."""

    client = FractalClient(fractal_compute_server)

    qc_specs = [{
        "method": "openff-1.0.0",
        "basis": "smirnoff",
        "program": "openmm",
        "spec_name": "openff"
    }, {
        "method": "gaff-2.11",
        "basis": "antechamber",
        "program": "openmm",
        "spec_name": "gaff"
    }]

    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = BasicDatasetFactory(driver="energy")
    factory.clear_qcspecs()
    for spec in qc_specs:
        factory.add_qc_spec(**spec,
                            spec_description="testing the single points")

    dataset = factory.create_dataset(
        dataset_name=f"Test single points multiple specs",
        molecules=molecules,
        description="Test basics dataset",
        tagline="Testing single point datasets",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("Dataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    assert ds.data.description == dataset.description
    assert ds.data.tagline == dataset.dataset_tagline
    assert ds.data.tags == dataset.dataset_tags

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    assert ds.data.default_driver == dataset.driver

    # get the last ran spec
    for specification in ds.data.history:
        driver, program, method, basis, spec_name = specification
        spec = dataset.qc_specifications[spec_name]
        assert driver == dataset.driver
        assert program == spec.program
        assert method == spec.method
        assert basis == spec.basis

    for spec in dataset.qc_specifications.values():
        query = ds.get_records(
            method=spec.method,
            basis=spec.basis,
            program=spec.program,
        )
        for index in query.index:
            result = query.loc[index].record
            assert result.status.value.upper() == "COMPLETE"
            assert result.error is None
            assert result.return_result is not None
Beispiel #28
0
def public_client():
    """Setup a new connection to the public qcarchive client."""

    return FractalClient()
Beispiel #29
0
def test_basic_submissions_single_pcm_spec(fractal_compute_server):
    """Test submitting a basic dataset to a snowflake server with pcm water in the specification."""

    client = FractalClient(fractal_compute_server)

    program = "psi4"
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = BasicDatasetFactory(driver="energy")
    factory.add_qc_spec(method="hf",
                        basis="sto-3g",
                        program=program,
                        spec_name="default",
                        spec_description="testing the single points with pcm",
                        implicit_solvent=PCMSettings(units="au",
                                                     medium_Solvent="water"),
                        overwrite=True)

    # only use one molecule due to the time it takes to run with pcm
    dataset = factory.create_dataset(
        dataset_name=f"Test single points with pcm water",
        molecules=molecules[0],
        description="Test basics dataset with pcm water",
        tagline="Testing single point datasets with pcm water",
    )

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client, await_result=False)

    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client, await_result=False)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("Dataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    assert ds.data.description == dataset.description
    assert ds.data.tagline == dataset.dataset_tagline
    assert ds.data.tags == dataset.dataset_tags

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    assert ds.data.default_driver == dataset.driver

    # get the last ran spec
    for specification in ds.data.history:
        driver, program, method, basis, spec_name = specification
        spec = dataset.qc_specifications[spec_name]
        assert driver == dataset.driver
        assert program == spec.program
        assert method == spec.method
        assert basis == spec.basis
        break
    else:
        raise RuntimeError(
            f"The requested compute was not found in the history {ds.data.history}"
        )

    for spec in dataset.qc_specifications.values():
        query = ds.get_records(
            method=spec.method,
            basis=spec.basis,
            program=spec.program,
        )
        for index in query.index:
            result = query.loc[index].record
            assert result.status.value.upper() == "COMPLETE"
            assert result.error is None
            assert result.return_result is not None
            # make sure the PCM result was captured
            assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0