コード例 #1
0
def test_create_dataset_atom_map():
    """Test creating a dataset with molecules with atom maps."""

    factory = OptimizationDatasetFactory()
    mol = Molecule.from_smiles("CCCC([O-])=O")
    mol.properties['atom_map'] = {1: 1, 2: 2, 3: 3, 4: 4}
    _ = factory.create_dataset(dataset_name="test name",
                               molecules=mol,
                               description="Force field test",
                               tagline="A test dataset")
コード例 #2
0
def test_optimization_submissions_with_constraints(fractal_compute_server):
    """
    Make sure that the constraints are added to the optimization and enforced.
    """
    client = FractalClient(fractal_compute_server)
    ethane = Molecule.from_file(get_data("ethane.sdf"), "sdf")
    factory = OptimizationDatasetFactory()
    dataset = OptimizationDataset(
        dataset_name="Test optimizations with constraint",
        description="Test optimization dataset with constraints",
        tagline="Testing optimization datasets")
    # add just mm spec
    dataset.add_qc_spec(method="openff-1.0.0",
                        basis="smirnoff",
                        program="openmm",
                        spec_name="default",
                        spec_description="mm default spec",
                        overwrite=True)
    # build some constraints
    constraints = Constraints()
    constraints.add_set_constraint(constraint_type="dihedral",
                                   indices=[2, 0, 1, 5],
                                   value=60,
                                   bonded=True)
    constraints.add_freeze_constraint(constraint_type="distance",
                                      indices=[0, 1],
                                      bonded=True)
    # add the molecule
    attributes = factory.create_cmiles_metadata(ethane)
    index = ethane.to_smiles()
    dataset.add_molecule(index=index,
                         molecule=ethane,
                         attributes=attributes,
                         constraints=constraints)
    # now add a mock url so we can submit the data
    dataset.metadata.long_description_url = "https://test.org"

    # now submit again
    dataset.submit(client=client)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("OptimizationDataset", dataset.dataset_name)
    record = ds.get_record(ds.df.index[0], "default")
    assert "constraints" in record.keywords
    assert record.status.value == "COMPLETE"
    assert record.error is None
    assert len(record.trajectory) > 1

    # now make sure the constraints worked
    final_molecule = record.get_final_molecule()
    assert pytest.approx(60, final_molecule.measure((2, 0, 1, 5)))
    assert pytest.approx(record.get_initial_molecule().measure((0, 1)),
                         final_molecule.measure((0, 1)))
コード例 #3
0
def test_optimization_driver():
    """
    Test the optimization factory to make sure the driver can not be changed.
    """

    factory = OptimizationDatasetFactory()

    with pytest.raises(DriverError):
        factory.driver = "energy"

    assert factory.driver == "gradient"
コード例 #4
0
    def _create_qcsubmit_dataset(self, dataset_name, mols, season):
        from openff.qcsubmit.factories import OptimizationDataset, OptimizationDatasetFactory
        # create OptimizationDataset with QCSubmit
        ds = OptimizationDataset(dataset_name=dataset_name)
        factory = OptimizationDatasetFactory()

        for mol in mols:
            id = self._mol_to_id(mol)

            attributes = factory.create_cmiles_metadata(mol)
            ds.add_molecule(index=id, molecule=mol, attributes=attributes)

        ds.qc_specifications = SEASONS[season]

        ds.metadata.long_description_url = "https://localhost.local/null"

        # add in known modifications to `OptimizationDataset` defaults
        ds.optimization_procedure.coordsys = 'dlc'
        ds.optimization_procedure.reset = True

        return ds
コード例 #5
0
def test_optimization_submissions_with_pcm(fractal_compute_server):
    """Test submitting an Optimization dataset to a snowflake server with PCM."""

    client = FractalClient(fractal_compute_server)

    program = "psi4"
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    # use a single small molecule due to the extra time PCM takes
    molecules = Molecule.from_smiles("C")

    factory = OptimizationDatasetFactory(driver="gradient")
    factory.add_qc_spec(method="hf",
                        basis="sto-3g",
                        program=program,
                        spec_name="default",
                        spec_description="test",
                        implicit_solvent=PCMSettings(units="au",
                                                     medium_Solvent="water"),
                        overwrite=True)

    dataset = factory.create_dataset(
        dataset_name="Test optimizations info with pcm water",
        molecules=molecules,
        description="Test optimization dataset",
        tagline="Testing optimization datasets",
    )

    # force a metadata validation error
    dataset.metadata.long_description = None

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client)

    # re-add the description so we can submit the data
    dataset.metadata.long_description = "Test basics dataset"

    # now submit again
    dataset.submit(client=client)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("OptimizationDataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    for qc_spec in dataset.qc_specifications.values():
        spec = ds.data.specs[qc_spec.spec_name]

        assert spec.description == qc_spec.spec_description
        assert spec.qc_spec.driver == dataset.driver
        assert spec.qc_spec.method == qc_spec.method
        assert spec.qc_spec.basis == qc_spec.basis
        assert spec.qc_spec.program == qc_spec.program

        # check the keywords
        keywords = client.query_keywords(spec.qc_spec.keywords)[0]

        assert keywords.values["maxiter"] == qc_spec.maxiter
        assert keywords.values["scf_properties"] == qc_spec.scf_properties

        # query the dataset
        ds.query(qc_spec.spec_name)

        for index in ds.df.index:
            record = ds.df.loc[index].default
            assert record.status.value == "COMPLETE"
            assert record.error is None
            assert len(record.trajectory) > 1
            result = record.get_trajectory()[0]
            assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys()
            assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys()
            # make sure the PCM result was captured
            assert result.extras["qcvars"]["PCM POLARIZATION ENERGY"] < 0
コード例 #6
0
def test_optimization_submissions(fractal_compute_server, specification):
    """Test submitting an Optimization dataset to a snowflake server."""

    client = FractalClient(fractal_compute_server)

    qc_spec, driver = specification
    program = qc_spec["program"]
    if not has_program(program):
        pytest.skip(f"Program '{program}' not found.")

    molecules = Molecule.from_file(get_data("butane_conformers.pdb"), "pdb")

    factory = OptimizationDatasetFactory(driver=driver)
    factory.add_qc_spec(**qc_spec,
                        spec_name="default",
                        spec_description="test",
                        overwrite=True)

    dataset = factory.create_dataset(
        dataset_name=f"Test optimizations info {program}, {driver}",
        molecules=molecules[:2],
        description="Test optimization dataset",
        tagline="Testing optimization datasets",
    )

    # force a metadata validation error
    dataset.metadata.long_description = None

    with pytest.raises(DatasetInputError):
        dataset.submit(client=client)

    # re-add the description so we can submit the data
    dataset.metadata.long_description = "Test basics dataset"

    # now submit again
    dataset.submit(client=client)

    fractal_compute_server.await_results()

    # make sure of the results are complete
    ds = client.get_collection("OptimizationDataset", dataset.dataset_name)

    # check the metadata
    meta = Metadata(**ds.data.metadata)
    assert meta == dataset.metadata

    # check the provenance
    assert dataset.provenance == ds.data.provenance

    # check the qc spec
    for qc_spec in dataset.qc_specifications.values():
        spec = ds.data.specs[qc_spec.spec_name]

        assert spec.description == qc_spec.spec_description
        assert spec.qc_spec.driver == dataset.driver
        assert spec.qc_spec.method == qc_spec.method
        assert spec.qc_spec.basis == qc_spec.basis
        assert spec.qc_spec.program == qc_spec.program

        # check the keywords
        keywords = client.query_keywords(spec.qc_spec.keywords)[0]

        assert keywords.values["maxiter"] == qc_spec.maxiter
        assert keywords.values["scf_properties"] == qc_spec.scf_properties

        # query the dataset
        ds.query(qc_spec.spec_name)

        for index in ds.df.index:
            record = ds.df.loc[index].default
            assert record.status.value == "COMPLETE"
            assert record.error is None
            assert len(record.trajectory) > 1
            # if we used psi4 make sure the properties were captured
            if program == "psi4":
                result = record.get_trajectory()[0]
                assert "CURRENT DIPOLE X" in result.extras["qcvars"].keys()
                assert "SCF QUADRUPOLE XX" in result.extras["qcvars"].keys()
コード例 #7
0
def test_adding_specifications(fractal_compute_server):
    """
    Test adding specifications to datasets.
    Here we are testing multiple scenarios:
    1) Adding an identical specification to a dataset
    2) Adding a spec with the same name as another but with different options
    3) overwrite a spec which was added but never used.
    """
    client = FractalClient(fractal_compute_server)
    mol = Molecule.from_smiles("CO")
    # make a dataset
    factory = OptimizationDatasetFactory()
    opt_dataset = factory.create_dataset(
        dataset_name="Specification error check",
        molecules=mol,
        description="test adding new compute specs to datasets",
        tagline="test adding new compute specs")
    opt_dataset.clear_qcspecs()
    # add a new mm spec
    opt_dataset.add_qc_spec(method="openff-1.0.0",
                            basis="smirnoff",
                            program="openmm",
                            spec_description="default openff spec",
                            spec_name="openff-1.0.0")

    # submit the optimizations and let the compute run
    opt_dataset.submit(client=client)
    fractal_compute_server.await_results()
    fractal_compute_server.await_services()

    # grab the collection
    ds = client.get_collection(opt_dataset.type, opt_dataset.dataset_name)

    # now try and add the specification again this should return True
    assert opt_dataset._add_dataset_specification(
        spec=opt_dataset.qc_specifications["openff-1.0.0"],
        procedure_spec=opt_dataset.optimization_procedure.get_optimzation_spec(
        ),
        dataset=ds) is True

    # now change part of the spec but keep the name the same
    opt_dataset.clear_qcspecs()
    opt_dataset.add_qc_spec(method="openff-1.2.1",
                            basis="smirnoff",
                            spec_name="openff-1.0.0",
                            program="openmm",
                            spec_description="openff-1.2.1 with wrong name.")

    # now try and add this specification with the same name but different settings
    with pytest.raises(QCSpecificationError):
        opt_dataset._add_dataset_specification(
            spec=opt_dataset.qc_specifications["openff-1.0.0"],
            procedure_spec=opt_dataset.optimization_procedure.
            get_optimzation_spec(),
            dataset=ds)

    # now add a new specification but no compute and make sure it is overwritten
    opt_dataset.clear_qcspecs()
    opt_dataset.add_qc_spec(method="ani1x",
                            basis=None,
                            program="torchani",
                            spec_name="ani",
                            spec_description="a ani spec")
    assert opt_dataset._add_dataset_specification(
        spec=opt_dataset.qc_specifications["ani"],
        procedure_spec=opt_dataset.optimization_procedure.get_optimzation_spec(
        ),
        dataset=ds) is True

    # now change the spec slightly and add again
    opt_dataset.clear_qcspecs()
    opt_dataset.add_qc_spec(method="ani1ccx",
                            basis=None,
                            program="torchani",
                            spec_name="ani",
                            spec_description="a ani spec")
    assert opt_dataset._add_dataset_specification(
        spec=opt_dataset.qc_specifications["ani"],
        procedure_spec=opt_dataset.optimization_procedure.get_optimzation_spec(
        ),
        dataset=ds) is True
コード例 #8
0
    def execute_optimization_from_molecules(self,
                                            input_paths,
                                            output_directory,
                                            season,
                                            ncores=1,
                                            memory=2,
                                            delete_existing=False,
                                            keep_existing=True,
                                            recursive=False,
                                            scf_maxiter=200,
                                            geometric_maxiter=300,
                                            geometric_coordsys='dlc',
                                            geometric_qccnv=False):
        """Execute optimizations from the given SDF molecules locally on this host.

        Optimizations are performed in series for the molecules given,
        with `ncores` and `memory` setting the resource constraints each optimization.

        Parameters
        ----------
        input_paths : iterable of Path-like
            Paths to SDF files or directories; if directories, all files SDF files in are loaded, recursively.
        output_directory : str
            Directory path to deposit exported data.
        season : str
            Benchmark season identifier. Indicates the mix of compute specs to utilize.
        ncores : int
            Number of concurrent cores to use for each optimization.
        memory : float
            Amount of memory in GiB to allow for each optimization.
        delete_existing : bool (False)
            If True, delete existing directory if present.
        keep_existing : bool (True)
            If True, keep existing files in export directory.
            Files corresponding to server data will not be re-exported.
            Relies *only* on filepaths of existing files for determining match.
        recursive : bool
            If True, recursively load SDFs from any directories given in `input_paths`.
    
        """
        from datetime import datetime
        import json

        from openff.qcsubmit.factories import OptimizationDatasetFactory

        # fail early if output_directory already exists and we aren't deleting it
        if os.path.isdir(output_directory):
            if delete_existing:
                shutil.rmtree(output_directory)
            elif keep_existing:
                pass
            else:
                raise Exception(
                    f'Output directory {output_directory} already exists. '
                    'Specify `delete_existing=True` to remove, or `keep_existing=True` to tolerate'
                )

        # get paths to submit, using output directory contents to inform choice
        # for the given specs, if *any* expected output files are not present, we submit corresponding input file
        if keep_existing:
            in_out_path_map = self._source_specs_output_paths(
                input_paths,
                SEASONS[season],
                output_directory,
                recursive=recursive)

            input_paths = []
            for input_file, output_files in in_out_path_map.items():
                if not all(map(os.path.exists, output_files)):
                    input_paths.append(input_file)

        # extract molecules from SDF inputs
        mols = mols_from_paths(input_paths, recursive=recursive)
        factory = OptimizationDatasetFactory()

        local_options = {"ncores": ncores, "memory": memory}

        results = []
        for spec_name, compute_spec in SEASONS[season].items():
            print("Processing spec: '{}'".format(spec_name))

            os.makedirs(os.path.join(output_directory, spec_name,
                                     'error_mols'),
                        exist_ok=True)

            for mol in mols:
                id = self._mol_to_id(mol)

                # fix to ensure output fidelity of ids; losing 02 padding on conformer
                org, molecule, conformer = id.split('-')
                output_id = "{org}-{molecule:05}-{conformer:02}".format(
                    org=org, molecule=int(molecule), conformer=int(conformer))

                # subfolders for each compute spec, files named according to molecule ids
                outfile = "{}".format(
                    os.path.join(output_directory, spec_name, output_id))

                print("... '{}'".format(id))

                input_data = self._generate_optimization_input(
                    mol, compute_spec, factory)

                # execute optimization
                start_dt = datetime.utcnow()
                result = self._execute_qcengine(
                    input_data,
                    local_options=local_options,
                    scf_maxiter=scf_maxiter,
                    geometric_maxiter=geometric_maxiter,
                    geometric_coordsys=geometric_coordsys,
                    geometric_qccnv=geometric_qccnv)

                end_dt = datetime.utcnow()
                perfd = {
                    'start': start_dt.isoformat(),
                    'end': end_dt.isoformat()
                }

                if result.success:
                    try:
                        final_molecule = self._process_optimization_result(
                            output_id, result)
                        self._execute_output_results(
                            output_id=output_id,
                            resultjson=result.json(),
                            final_molecule=final_molecule,
                            outfile=outfile,
                            success=True,
                            perfd=perfd)
                    except Exception as e:
                        print("... '{}' : export error".format(id))
                        final_molecule = None

                        error_outfile = "{}".format(
                            os.path.join(output_directory, spec_name,
                                         'error_mols', output_id))

                        try:
                            with open("{}.txt".format(error_outfile),
                                      'w') as f:
                                f.write(str(e))
                        except:
                            pass

                        self._execute_output_results(
                            output_id=output_id,
                            resultjson=result.json(),
                            final_molecule=final_molecule,
                            outfile=error_outfile,
                            success=False,
                            perfd=perfd)
                else:
                    print("... '{}' : compute failed".format(id))
                    final_molecule = None
                    error_outfile = "{}".format(
                        os.path.join(output_directory, spec_name, 'error_mols',
                                     output_id))

                    self._execute_output_results(output_id=output_id,
                                                 resultjson=result,
                                                 final_molecule=final_molecule,
                                                 outfile=error_outfile,
                                                 success=False,
                                                 perfd=perfd)

                results.append(result)

        return results