Esempio n. 1
0
def test_make_fitting_schema_from_results():
    """
    Test that new fitting schemas can be made from results and that all results are full
    """
    # build the workflow
    workflow = WorkflowFactory()
    fb = ForceBalanceOptimizer()
    fb.set_optimization_target(target=AbInitio_SMIRNOFF())
    workflow.set_optimizer(optimizer=fb)

    # set up the client and load the results
    # load a client and pull some results
    client = FractalClient()
    # grab a dataset with small fragments in it
    result = TorsionDriveCollectionResult.from_server(
        client=client,
        spec_name="default",
        dataset_name="OpenFF-benchmark-ligand-fragments-v1.0",
        final_molecule_only=True,
        subset=bace_entries)
    schema = workflow.fitting_schema_from_results(results=result, combine=True)
    # there should be 2 total molecules as we have combined two results
    assert schema.n_molecules == 2
    # there are a total of 3 torsiondrives
    assert schema.n_tasks == 3
    # make sure each task has results and is ready to fit
    for task in schema.tasks:
        assert task.ready_for_fitting is True
Esempio n. 2
0
def test_optimizer_explicit():
    """
    Run the optimizer process in the main thread to make sure it works.
    """
    biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                                  file_format="sdf")
    # now make the schema
    schema = get_fitting_schema(molecules=biphenyl)
    result = TorsionDriveCollectionResult.parse_file(
        get_data("biphenyl.json.xz"))
    schema.update_with_results(results=result)
    # now submit to the executor
    execute = Executor()
    # we dont need the server here
    # put a task in the opt queue then kill it
    execute.total_tasks = 1
    execute.opt_queue.put(schema.tasks[0])
    with temp_directory():
        execute.optimizer()
        # find the task in the finished queue
        task = execute.finished_tasks.get()
        result_schema = execute.update_fitting_schema(task=task,
                                                      fitting_schema=schema)
        smirks = result_schema.tasks[0].final_smirks
        # make sure they have been updated
        for smirk in smirks:
            for term in smirk.terms.values():
                assert float(term.k.split()[0]) != 1e-5
Esempio n. 3
0
def test_torsiondrivedataset_new_optimization(public_client):
    """
    Tast making a new optimization dataset of constrained optimizations from the results of a torsiondrive dataset.
    """

    result = TorsionDriveCollectionResult.from_server(
        client=public_client,
        spec_name="default",
        dataset_name="TorsionDrive Paper",
        include_trajectory=True,
        final_molecule_only=False,
        subset=["[ch2:3]([ch2:2][oh:4])[oh:1]_12"])
    # make a new torsiondrive dataset
    new_dataset = result.create_optimization_dataset(
        dataset_name="new optimization dataset",
        description="a test optimization dataset",
        tagline="a test optimization dataset.")

    assert new_dataset.dataset_name == "new optimization dataset"
    assert new_dataset.n_molecules == 1
    assert new_dataset.n_records == 24
    dihedrals = set()
    for entry in new_dataset.dataset.values():
        assert entry.constraints.has_constraints is True
        assert len(entry.constraints.set) == 1
        dihedrals.add(entry.constraints.set[0].value)

    # now sort the dihedrals and make sure they are all present
    dihs = sorted(dihedrals)
    refs = [x for x in range(-165, 195, 15)]
    assert dihs == refs
Esempio n. 4
0
def test_task_from_results():
    """
    Test making an individual task from a set of results
    """
    # load a client and pull some results
    client = FractalClient()
    # grab a dataset with small fragments in it
    result = TorsionDriveCollectionResult.from_server(
        client=client,
        spec_name="default",
        dataset_name="OpenFF-benchmark-ligand-fragments-v1.0",
        final_molecule_only=True,
        subset=bace_entries[:1])
    # grab the only result
    result = list(result.collection.values())[0]
    # set up the workflow
    workflow = WorkflowFactory()
    fb = ForceBalanceOptimizer()
    fb.set_optimization_target(target=AbInitio_SMIRNOFF())
    workflow.set_optimizer(optimizer=fb)
    # this should be a simple biphenyl molecule
    opt_schema = workflow._task_from_results(results=[
        result,
    ], index=1)

    assert opt_schema.initial_forcefield == workflow.initial_forcefield
    assert opt_schema.optimizer_name == fb.optimizer_name
    assert opt_schema.job_id == "bespoke_task_1"
    assert bool(opt_schema.target_smirks) is True
    assert opt_schema.target_parameters == workflow.target_parameters
    assert result.molecule == opt_schema.target_molecule.molecule
    assert opt_schema.n_tasks == 1
    assert opt_schema.n_targets == 1
    assert opt_schema.ready_for_fitting is True
Esempio n. 5
0
def test_torsionprofile_metadata():
    """
    Make sure that when using the torsionprofile target we make the metatdat.json file.
    """
    from openff.qcsubmit.serializers import deserialize
    torsion_target = TorsionProfile_SMIRNOFF()
    target_schema = biphenyl_target(target=torsion_target)
    # now load in a scan result we have saved
    result_data = TorsionDriveCollectionResult.parse_file(
        get_data("biphenyl.json.xz"))
    # now try and update the results
    target_schema.update_with_results(results=result_data)
    assert target_schema.ready_for_fitting is True
    # now try and prep for fitting
    with temp_directory():
        torsion_target.prep_for_fitting(fitting_target=target_schema)
        # we should only have one torsion drive to do here
        folders = os.listdir(".")
        assert len(folders) == 1
        target_files = os.listdir(folders[0])
        assert "molecule.pdb" in target_files
        assert "scan.xyz" in target_files
        assert "molecule.mol2" in target_files
        assert "qdata.txt" in target_files
        assert "metadata.json" in target_files

        metadata = deserialize(
            file_name=os.path.join(folders[0], "metadata.json"))
        # now make sure the json is complete
        entry = target_schema.tasks[0]
        assert entry.dihedrals[0] == tuple(metadata["dihedrals"][0])
        for data in entry.reference_data():
            assert data.extras["dihedral_angle"] in metadata[
                "torsion_grid_ids"]
Esempio n. 6
0
def test_torsiondrivedataset_export(public_client):
    """
    Make sure that the torsiondrive datasets can be exported.
    """

    with temp_directory():
        result = TorsionDriveCollectionResult.from_server(
            client=public_client,
            spec_name="default",
            dataset_name="TorsionDrive Paper",
            include_trajectory=False,
            final_molecule_only=True)

        result.export_results("dataset.json")

        result2 = TorsionDriveCollectionResult.parse_file("dataset.json")

        assert result.dict(exclude={"collection"}) == result2.dict(
            exclude={"collection"})
        for molecule in result.collection:
            assert molecule in result2.collection
Esempio n. 7
0
def test_abinitio_fitting_prep_no_gradient():
    """
    Test preparing for fitting using the abinitio target.
    """

    torsion_target = AbInitio_SMIRNOFF()
    torsion_target.fit_gradient = False
    target_schema = biphenyl_target(target=torsion_target)
    biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                                  file_format="sdf")
    # now load in a scan result we have saved
    result_data = TorsionDriveCollectionResult.parse_file(
        get_data("biphenyl.json.xz"))
    # now try and update the results
    target_schema.update_with_results(results=result_data)
    assert target_schema.ready_for_fitting is True
    # now try and prep for fitting
    with temp_directory():
        torsion_target.prep_for_fitting(fitting_target=target_schema)
        # we should only have one torsion drive to do here
        folders = os.listdir(".")
        assert len(folders) == 1
        target_files = os.listdir(folders[0])
        assert "molecule.pdb" in target_files
        assert "scan.xyz" in target_files
        assert "molecule.mol2" in target_files
        assert "qdata.txt" in target_files
        # now we need to make sure the pdb order was not changed
        mol = Molecule.from_file(os.path.join(folders[0], "molecule.pdb"),
                                 file_format="pdb")
        isomorphic, atom_map = Molecule.are_isomorphic(biphenyl,
                                                       mol,
                                                       return_atom_map=True)
        assert isomorphic is True
        assert atom_map == dict((i, i) for i in range(biphenyl.n_atoms))

        # also make sure charges are in the mol2 file
        mol = Molecule.from_file(os.path.join(folders[0], "molecule.mol2"),
                                 "mol2")
        assert mol.partial_charges is not None

        # make sure the scan coords and energies match
        qdata_file = os.path.join(folders[0], "qdata.txt")
        coords, energies, gradients = read_qdata(qdata_file=qdata_file)
        # make sure no gradients were written
        assert not gradients
        reference_data = target_schema.tasks[0].reference_data()
        for i, (coord, energy) in enumerate(zip(coords, energies)):
            # find the reference data
            data = reference_data[i]
            assert data.energy == energy
            assert coord == data.molecule.geometry.flatten().tolist()
Esempio n. 8
0
def test_torsiondrivedataset_final_result_only(public_client):
    """
    Make sure the final_molecule_only keyword is working
    """

    result = TorsionDriveCollectionResult.from_server(
        client=public_client,
        spec_name="default",
        dataset_name="TorsionDrive Paper",
        include_trajectory=False,
        final_molecule_only=True)

    # now we need to make sure that each optimization traj has only one molecule in it.
    for torsiondrive in result.collection.values():
        for optimization in torsiondrive.optimization.values():
            assert len(optimization.trajectory) == 1
Esempio n. 9
0
def test_torsiondrivedataset_traj_subset(public_client):
    """
    Make sure the full trajectories are pulled when requested for a subset of molecules in a collection.
    """

    result = TorsionDriveCollectionResult.from_server(
        client=public_client,
        spec_name="default",
        dataset_name="TorsionDrive Paper",
        include_trajectory=True,
        final_molecule_only=False,
        subset=["[ch2:3]([ch2:2][oh:4])[oh:1]_12"])

    # make sure one torsiondrive was pulled down
    assert len(result.collection) == 1
    # now make sure the full trajectory is pulled
    torsiondrive = result.collection["[ch2:3]([ch2:2][oh:4])[oh:1]_12"]
    for optimization in torsiondrive.optimization.values():
        assert len(optimization.trajectory) > 2
Esempio n. 10
0
def test_torsiondrivedataset_result_default(public_client):
    """
    Test downloading a basic torsiondrive dataset from the archive.
    """
    import numpy as np
    from simtk import unit

    result = TorsionDriveCollectionResult.from_server(
        client=public_client,
        spec_name="default",
        dataset_name="TorsionDrive Paper",
        include_trajectory=False,
        final_molecule_only=False)

    # now we need to make sure that each optimization traj has only one molecule in it.
    for torsiondrive in result.collection.values():
        for optimization in torsiondrive.optimization.values():
            assert len(optimization.trajectory) == 2

    # make sure the three torsiondrives are pulled
    assert len(result.collection) == 3

    # now check the utility functions
    torsiondrive = result.collection["[ch2:3]([ch2:2][oh:4])[oh:1]_12"]
    assert torsiondrive.final_energies is not None
    # make sure there is an energy of every result
    assert len(torsiondrive.final_energies) == len(torsiondrive.optimization)
    mol = torsiondrive.molecule
    molecule = torsiondrive.get_torsiondrive()
    assert mol == molecule
    # make sure the conformers are loaded onto the molecule
    assert molecule.n_conformers == len(torsiondrive.optimization)
    # now check each conformer
    ordered_results = torsiondrive.get_ordered_results()
    for conformer, single_result in zip(molecule.conformers, ordered_results):
        assert np.allclose(
            conformer.in_units_of(unit.bohr).tolist(),
            single_result[1].molecule.geometry.tolist())

    # now make sure the lowest energy optimization is recognized
    lowest_result = torsiondrive.get_lowest_energy_optimisation()
    all_energies = list(torsiondrive.final_energies.values())
    assert lowest_result.final_energy == min(all_energies)
Esempio n. 11
0
def test_forcebalance_optimize(optimization_target):
    """
    Test running the full optimization stage for a simple biphenyl system using different targets.
    The data has been extracted from qcarchive.
    """
    from openff.qcsubmit.results import TorsionDriveCollectionResult
    workflow = biphenyl_workflow(target=optimization_target)
    with temp_directory():
        # load the computed results and add them to the workflow
        torsiondrive_result = TorsionDriveCollectionResult.parse_file(
            get_data("biphenyl.json.xz"))
        workflow.update_with_results(results=torsiondrive_result)
        # setup the optimizer
        fb = ForceBalanceOptimizer()
        result = fb.optimize(schema=workflow)
        assert result.status == Status.Complete
        new_smirks = result.target_smirks
        for smirk in new_smirks:
            for param in smirk.terms.values():
                assert param.k != "1e-05 * mole**-1 * kilocalorie"
Esempio n. 12
0
def test_sort_results(combine):
    """
    Test sorting the results before making a fitting schema with and without combination.
    """
    # load up the fractal client
    client = FractalClient()
    # grab a dataset with bace fragments in it
    result = TorsionDriveCollectionResult.from_server(
        client=client,
        spec_name="default",
        dataset_name="OpenFF-benchmark-ligand-fragments-v1.0",
        final_molecule_only=True,
        subset=bace_entries)
    workflow = WorkflowFactory()
    # now sort the results
    all_results = workflow._sort_results(results=result, combine=combine)
    if combine:
        assert len(all_results) == 2
    else:
        assert len(all_results) == 3
Esempio n. 13
0
def test_torsiondrive_new_basicdataset(public_client):
    """
    Test creating a new basicdataset of the final geometries of the current torsiondrive results class.
    """
    result = TorsionDriveCollectionResult.from_server(
        client=public_client,
        spec_name="default",
        dataset_name="TorsionDrive Paper",
        include_trajectory=True,
        final_molecule_only=False,
        subset=["[ch2:3]([ch2:2][oh:4])[oh:1]_12"])
    new_dataset = result.create_basic_dataset(dataset_name="new basicdataset",
                                              description="new basicdataset",
                                              tagline="new basicdataset",
                                              driver="gradient")
    assert new_dataset.dataset_name == "new basicdataset"
    assert new_dataset.n_molecules == 1
    # make sure all of the molecule geometries are unpacked
    assert new_dataset.n_records == 24
    assert new_dataset.driver.value == "gradient"
Esempio n. 14
0
def test_torsiondrivedataset_new_torsiondrive(public_client):
    """
    Test making a new torsiondrive dataset from the results of another.
    """

    result = TorsionDriveCollectionResult.from_server(
        client=public_client,
        spec_name="default",
        dataset_name="TorsionDrive Paper",
        include_trajectory=True,
        final_molecule_only=False,
        subset=["[ch2:3]([ch2:2][oh:4])[oh:1]_12"])
    # make a new torsiondrive dataset
    new_dataset = result.create_torsiondrive_dataset(
        dataset_name="new torsiondrive dataset",
        description="a test torsiondrive dataset",
        tagline="a test torsiondrive dataset.")
    assert new_dataset.dataset_name == "new torsiondrive dataset"
    assert new_dataset.n_molecules == 1
    assert new_dataset.n_records == 1
    entry = new_dataset.dataset["[ch2:3]([ch2:2][oh:4])[oh:1]_12"]
    # make sure all starting molecules are present
    assert len(entry.initial_molecules) == 24