예제 #1
0
def test_forcebalance_collect_results():
    """
    Test trying to collect results that have been successful and updated the parameters.
    """
    workflow = biphenyl_workflow(target=AbInitio_SMIRNOFF)
    # first make sure the target smirks are set to the default value
    target_smirks = workflow.target_smirks
    for smirk in target_smirks:
        for param in smirk.terms.values():
            # starting value
            assert param.k == "1.048715180139 * mole**-1 * kilocalorie"

    # set up the dummy output folder
    with temp_directory():
        # copy the file over
        shutil.copy(get_data("complete.out"), "optimize.out")
        results_folder = os.path.join("result", "optimize")
        os.makedirs(results_folder, exist_ok=True)
        ff_path = os.path.join(results_folder, "bespoke.offxml")
        shutil.copy(get_data("bespoke.offxml"), ff_path)
        fb = ForceBalanceOptimizer()
        result_workflow = fb.collect_results(schema=workflow)
        # make sure the smirks have been updated
        new_smirks = result_workflow.final_smirks
        for smirk in new_smirks:
            for param in smirk.terms.values():
                assert param.k != "1.048715180139 * mole**-1 * kilocalorie"
예제 #2
0
def test_optimizer_explicit():
    """
    Run the optimizer process in the main thread to make sure it works.
    """
    biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                                  file_format="sdf")
    # now make the schema
    schema = get_fitting_schema(molecules=biphenyl)
    result = TorsionDriveCollectionResult.parse_file(
        get_data("biphenyl.json.xz"))
    schema.update_with_results(results=result)
    # now submit to the executor
    execute = Executor()
    # we dont need the server here
    # put a task in the opt queue then kill it
    execute.total_tasks = 1
    execute.opt_queue.put(schema.tasks[0])
    with temp_directory():
        execute.optimizer()
        # find the task in the finished queue
        task = execute.finished_tasks.get()
        result_schema = execute.update_fitting_schema(task=task,
                                                      fitting_schema=schema)
        smirks = result_schema.tasks[0].final_smirks
        # make sure they have been updated
        for smirk in smirks:
            for term in smirk.terms.values():
                assert float(term.k.split()[0]) != 1e-5
def test_parent_fragment_mapping(molecules):
    """
    Test generating a parent fragment mapping.
    """
    molecule1, molecule2, atom_map = molecules
    mol1 = Molecule.from_file(get_data(molecule1), "sdf")
    mol2 = Molecule.from_file(get_data(molecule2), "sdf")
    mapping = FragmentEngine._get_fragment_parent_mapping(fragment=mol2, parent=mol1)
    assert mapping == atom_map
예제 #4
0
def test_abinitio_fitting_prep_no_gradient():
    """
    Test preparing for fitting using the abinitio target.
    """

    torsion_target = AbInitio_SMIRNOFF()
    torsion_target.fit_gradient = False
    target_schema = biphenyl_target(target=torsion_target)
    biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                                  file_format="sdf")
    # now load in a scan result we have saved
    result_data = TorsionDriveCollectionResult.parse_file(
        get_data("biphenyl.json.xz"))
    # now try and update the results
    target_schema.update_with_results(results=result_data)
    assert target_schema.ready_for_fitting is True
    # now try and prep for fitting
    with temp_directory():
        torsion_target.prep_for_fitting(fitting_target=target_schema)
        # we should only have one torsion drive to do here
        folders = os.listdir(".")
        assert len(folders) == 1
        target_files = os.listdir(folders[0])
        assert "molecule.pdb" in target_files
        assert "scan.xyz" in target_files
        assert "molecule.mol2" in target_files
        assert "qdata.txt" in target_files
        # now we need to make sure the pdb order was not changed
        mol = Molecule.from_file(os.path.join(folders[0], "molecule.pdb"),
                                 file_format="pdb")
        isomorphic, atom_map = Molecule.are_isomorphic(biphenyl,
                                                       mol,
                                                       return_atom_map=True)
        assert isomorphic is True
        assert atom_map == dict((i, i) for i in range(biphenyl.n_atoms))

        # also make sure charges are in the mol2 file
        mol = Molecule.from_file(os.path.join(folders[0], "molecule.mol2"),
                                 "mol2")
        assert mol.partial_charges is not None

        # make sure the scan coords and energies match
        qdata_file = os.path.join(folders[0], "qdata.txt")
        coords, energies, gradients = read_qdata(qdata_file=qdata_file)
        # make sure no gradients were written
        assert not gradients
        reference_data = target_schema.tasks[0].reference_data()
        for i, (coord, energy) in enumerate(zip(coords, energies)):
            # find the reference data
            data = reference_data[i]
            assert data.energy == energy
            assert coord == data.molecule.geometry.flatten().tolist()
예제 #5
0
def test_collecting_results():
    """
    Make sure that tasks are collected correctly from a QCArchive instance.
    """

    # connect to the public database
    client = FractalClient()
    biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                                  file_format="sdf")
    # now make the schema
    schema = get_fitting_schema(molecules=biphenyl)

    # now submit to the executor
    executor = Executor()
    # change to make sure we search the correct dataset
    executor._dataset_name = "OpenFF-benchmark-ligand-fragments-v1.0"
    # fake a collection dict
    to_collect = {
        "torsion1d": {
            "default": [
                "[h]c1c([c:1]([c:2](c(c1[h])[h])[c:3]2[c:4](c(c(c(c2[h])cl)[h])[h])[h])[h])[h]",
            ]
        },
        "optimization": {},
        "hessian": {}
    }
    # now let the executor update the task
    executor.collect_task_results(task=schema.tasks[0],
                                  collection_dict=to_collect,
                                  client=client)
    # make sure it worked
    assert schema.tasks[0].ready_for_fitting is True
예제 #6
0
def test_torsionprofile_metadata():
    """
    Make sure that when using the torsionprofile target we make the metatdat.json file.
    """
    from openff.qcsubmit.serializers import deserialize
    torsion_target = TorsionProfile_SMIRNOFF()
    target_schema = biphenyl_target(target=torsion_target)
    # now load in a scan result we have saved
    result_data = TorsionDriveCollectionResult.parse_file(
        get_data("biphenyl.json.xz"))
    # now try and update the results
    target_schema.update_with_results(results=result_data)
    assert target_schema.ready_for_fitting is True
    # now try and prep for fitting
    with temp_directory():
        torsion_target.prep_for_fitting(fitting_target=target_schema)
        # we should only have one torsion drive to do here
        folders = os.listdir(".")
        assert len(folders) == 1
        target_files = os.listdir(folders[0])
        assert "molecule.pdb" in target_files
        assert "scan.xyz" in target_files
        assert "molecule.mol2" in target_files
        assert "qdata.txt" in target_files
        assert "metadata.json" in target_files

        metadata = deserialize(
            file_name=os.path.join(folders[0], "metadata.json"))
        # now make sure the json is complete
        entry = target_schema.tasks[0]
        assert entry.dihedrals[0] == tuple(metadata["dihedrals"][0])
        for data in entry.reference_data():
            assert data.extras["dihedral_angle"] in metadata[
                "torsion_grid_ids"]
예제 #7
0
def test_forcebalance_collect_result_error():
    """
    Test trying to collect the result when the workflow has an error.
    """
    workflow = biphenyl_workflow(target=AbInitio_SMIRNOFF)
    # we need to set up a dummy folder with the error
    with temp_directory():
        # copy the file over
        shutil.copy(get_data("error.out"), "optimize.out")
        results_folder = os.path.join("result", "optimize")
        os.makedirs(results_folder, exist_ok=True)
        ff_path = os.path.join(results_folder, "bespoke.offxml")
        shutil.copy(get_data("bespoke.offxml"), ff_path)
        fb = ForceBalanceOptimizer()
        result_workflow = fb.collect_results(schema=workflow)
        assert result_workflow.status == Status.ConvergenceError
예제 #8
0
def test_pre_run_check_no_opt():
    """
    Make sure that the pre run check throws an error if there is no optimiser.
    """
    workflow = WorkflowFactory()
    ethane = Molecule.from_file(file_path=get_data("ethane.sdf"),
                                file_format="sdf")

    with pytest.raises(OptimizerError):
        _ = workflow.fitting_schema_from_molecules(molecules=ethane)
예제 #9
0
def test_label_molecule():
    """
    Test that labeling a molecule with the editor works.
    """

    ff = ForceFieldEditor(forcefield_name="openff-1.0.0.offxml")
    ethane = Molecule.from_file(file_path=get_data("ethane.sdf"), file_format="sdf")

    labels = ff.label_molecule(molecule=ethane)
    for param_type in ["Bonds", "Angles", "ProperTorsions", "ImproperTorsions", "vdW"]:
        assert param_type in labels
예제 #10
0
def test_normal_fragmentation():
    """
    Test that a molecule can be fragmented successfully and produce the expected results.
    """
    # bace can be fragmented into 3 parts 2 of which are the same
    engine = WBOFragmenter()
    engine.keep_non_rotor_ring_substituents = False
    bace = Molecule.from_file(file_path=get_data("bace_parent.sdf"), file_format="sdf")
    fragment_data = engine.fragment(molecule=bace)
    assert len(fragment_data) == 3

    fragments = [fragment.fragment_molecule for fragment in fragment_data]
    # make sure the fragments are correct
    for fragment in ["bace_frag1.sdf", "bace_frag2.sdf"]:
        frag_mol = Molecule.from_file(file_path=get_data(fragment), file_format="sdf")
        assert frag_mol in fragments

    # make sure all of the central bonds are different
    torsions = set([fragment.parent_torsion for fragment in fragment_data])
    assert len(torsions) == 3
예제 #11
0
def test_pre_run_check_no_target():
    """
    Make sure that the pre run check catches if there are no targets set up
    """
    workflow = WorkflowFactory()
    ethane = Molecule.from_file(file_path=get_data("ethane.sdf"),
                                file_format="sdf")
    fb = ForceBalanceOptimizer()
    workflow.set_optimizer(optimizer=fb)
    with pytest.raises(OptimizerError):
        _ = workflow.fitting_schema_from_molecules(molecules=ethane)
예제 #12
0
def test_missing_task_type():
    """
    Make sure an error is raised if we do not know how to generate the task.
    """
    target = DummyTarget()
    target.collection_workflow = "test"
    molecule = Molecule.from_file(get_data("ethanol.sdf"))
    with pytest.raises(NotImplementedError):
        _ = target.generate_fitting_task(
            molecule=molecule,
            fragment=False,
            attributes=get_molecule_cmiles(molecule),
            dihedrals=[(8, 2, 1, 0)])
예제 #13
0
def test_pre_run_check_no_smirks():
    """
    Make sure that the pre run check checks that some target smirks have been supplied.
    """
    workflow = WorkflowFactory()
    ethane = Molecule.from_file(file_path=get_data("ethane.sdf"),
                                file_format="sdf")
    fb = ForceBalanceOptimizer()
    fb.set_optimization_target(target=AbInitio_SMIRNOFF())
    workflow.set_optimizer(optimizer=fb)
    workflow.target_smirks = []
    with pytest.raises(TargetNotSetError):
        _ = workflow.fitting_schema_from_molecules(molecules=ethane)
예제 #14
0
def test_pre_run_check_no_params():
    """
    Make sure that the pre run check catches if we have not set any parameters to optimise, like bond length.
    """
    workflow = WorkflowFactory()
    ethane = Molecule.from_file(file_path=get_data("ethane.sdf"),
                                file_format="sdf")
    fb = ForceBalanceOptimizer()
    fb.set_optimization_target(target=AbInitio_SMIRNOFF())
    workflow.set_optimizer(optimizer=fb)
    workflow.target_parameters = []
    with pytest.raises(TargetNotSetError):
        _ = workflow.fitting_schema_from_molecules(molecules=ethane)
예제 #15
0
def test_pre_run_check_no_frag():
    """
    Make sure the pre run check catches if there is no fragmentation method set.
    """
    workflow = WorkflowFactory()
    ethane = Molecule.from_file(file_path=get_data("ethane.sdf"),
                                file_format="sdf")
    fb = ForceBalanceOptimizer()
    fb.set_optimization_target(target=AbInitio_SMIRNOFF())
    workflow.set_optimizer(optimizer=fb)
    workflow.fragmentation_engine = None
    with pytest.raises(FragmenterError):
        _ = workflow.fitting_schema_from_molecules(molecules=ethane)
예제 #16
0
def test_generate_fitting_task(collection_workflow):
    """
    Make sure the correct fitting task is made based on the collection workflow.
    """
    target = DummyTarget()
    target.collection_workflow = collection_workflow
    molecule = Molecule.from_file(get_data("ethanol.sdf"))
    task_schema = target.generate_fitting_task(
        molecule=molecule,
        fragment=False,
        attributes=get_molecule_cmiles(molecule),
        dihedrals=[(8, 2, 1, 0)])
    assert task_schema.task_type == collection_workflow
예제 #17
0
def biphenyl_workflow(target) -> OptimizationSchema:
    """
    Create a workflow schema which targets the rotatable bond in ethane.
    """
    mol = Molecule.from_file(get_data("biphenyl.sdf"), "sdf")
    workflow = WorkflowFactory()
    # turn off bespoke terms we want fast fitting
    workflow.generate_bespoke_terms = False
    workflow.expand_torsion_terms = False
    fb = ForceBalanceOptimizer()
    target = target()
    fb.set_optimization_target(target=target)
    workflow.set_optimizer(optimizer=fb)
    schema = workflow.fitting_schema_from_molecules(molecules=mol)
    return schema.tasks[0]
def test_bespoke_target_torsion_smirks():
    """
    Generate bespoke torsion smirks only for the target torsions and make sure the intended atoms are covered.
    """
    gen = SmirksGenerator()
    mol = Molecule.from_file(get_data("OCCO.sdf"))

    torsion_smirks = gen._get_bespoke_torsion_smirks(molecule=mol,
                                                     central_bonds=[(1, 2)])
    # there should be 3 unique smirks for this molecule
    # H-C-C-H, H-C-C-O, O-C-C-O
    assert len(torsion_smirks) == 3
    for smirk in torsion_smirks:
        atoms = condense_matches(mol.chemical_environment_matches(
            smirk.smirks))
        assert compare_matches(atoms, smirk.atoms) is True
예제 #19
0
def test_submit_new_tasks(fractal_compute_server):
    """
    Make sure that any new tasks which are generated/found are added to the archive instance.
    """

    client = FractalClient(fractal_compute_server)
    # this will not actually run as we do not install psi4
    biphenyl = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                                  file_format="sdf")
    # now make the schema
    schema = get_fitting_schema(molecules=biphenyl)

    executor = Executor()
    # make sure new tasks are submitted
    task = schema.tasks[0]
    response = executor.submit_new_tasks(task=task, client=client)
    assert response == {'OpenFF Bespoke-fit': {'default': 1}}
예제 #20
0
def test_forcebalance_readoutput(output):
    """
    Test reading the output of a forcebalance run.
    """
    file_name, status = output
    with temp_directory():
        # copy the file over
        shutil.copy(get_data(file_name), "optimize.out")
        # now we have to make sum dummy folders
        results_folder = os.path.join("result", "optimize")
        os.makedirs(results_folder, exist_ok=True)
        with open(os.path.join(results_folder, "bespoke.offxml"), "w") as xml:
            xml.write("test")
        fb = ForceBalanceOptimizer()
        result = fb.read_output()
        assert result["status"] == status
        assert "bespoke.offxml" in result["forcefield"]
예제 #21
0
def biphenyl_target(
        target: Union[AbInitio_SMIRNOFF,
                      TorsionProfile_SMIRNOFF]) -> TargetSchema:
    """
    Return a target schema made by the target class for biphenyl.
    """
    mol = Molecule.from_file(file_path=get_data("biphenyl.sdf"),
                             file_format="sdf")
    target_schema = target.generate_target_schema()
    # create one task schema
    task_schema = target.generate_fitting_task(
        molecule=mol,
        fragment=False,
        attributes=get_molecule_cmiles(molecule=mol),
        dihedrals=[
            (5, 9, 10, 6),
        ])
    target_schema.add_fitting_task(task=task_schema)
    return target_schema
예제 #22
0
def test_task_from_molecule():
    """
    Test the workflow function which makes the optimization schema from a molecule
    """
    bace = Molecule.from_file(file_path=get_data("bace.sdf"),
                              file_format="sdf")
    workflow = WorkflowFactory()
    fb = ForceBalanceOptimizer()
    fb.set_optimization_target(target=AbInitio_SMIRNOFF())
    workflow.set_optimizer(optimizer=fb)

    opt_schema = workflow._task_from_molecule(molecule=bace, index=1)
    assert opt_schema.initial_forcefield == workflow.initial_forcefield
    assert opt_schema.optimizer_name == fb.optimizer_name
    assert opt_schema.job_id == "bespoke_task_1"
    assert bool(opt_schema.target_smirks) is True
    assert opt_schema.target_parameters == workflow.target_parameters
    assert opt_schema.target_molecule.molecule == bace
    assert opt_schema.n_tasks == 3
    assert opt_schema.n_targets == 1
예제 #23
0
def test_forcebalance_optimize(optimization_target):
    """
    Test running the full optimization stage for a simple biphenyl system using different targets.
    The data has been extracted from qcarchive.
    """
    from openff.qcsubmit.results import TorsionDriveCollectionResult
    workflow = biphenyl_workflow(target=optimization_target)
    with temp_directory():
        # load the computed results and add them to the workflow
        torsiondrive_result = TorsionDriveCollectionResult.parse_file(
            get_data("biphenyl.json.xz"))
        workflow.update_with_results(results=torsiondrive_result)
        # setup the optimizer
        fb = ForceBalanceOptimizer()
        result = fb.optimize(schema=workflow)
        assert result.status == Status.Complete
        new_smirks = result.target_smirks
        for smirk in new_smirks:
            for param in smirk.terms.values():
                assert param.k != "1e-05 * mole**-1 * kilocalorie"
def test_bespoke_torsion_smirks():
    """
    Generate bespoke smirks for every torsion in the molecule, make sure that the intended atoms are covered and make sure every torsion has a bespoke smirks.
    """
    gen = SmirksGenerator()
    mol = Molecule.from_file(get_data("OCCO.sdf"))

    torsion_smirks = gen._get_bespoke_torsion_smirks(molecule=mol)
    # there should be 5 unique torsions
    assert len(torsion_smirks) == 5

    all_torsions = []
    for smirk in torsion_smirks:
        atoms = condense_matches(mol.chemical_environment_matches(
            smirk.smirks))
        all_torsions.extend(atoms)
        assert compare_matches(atoms, smirk.atoms) is True

    for torsion in mol.propers:
        dihedral = tuple([atom.molecule_atom_index for atom in torsion])
        assert dihedral in all_torsions or tuple(
            reversed(dihedral)) in all_torsions
예제 #25
0
    def generate_optimize_in(
        self, priors: Dict[str, float], fitting_targets: Dict[str, List[str]]
    ) -> None:
        """
        Using jinja generate an optimize.in control file for forcebalance at the given location.

        Parameters
        ----------
        priors: Dict[str, float]
            A dictionary containing the prior names and values.
        fitting_targets: Dict[str, List[str]]
            A dictionary containing the fitting target names sorted by forcebalance target.

        Notes
        -----
            This function can be used to generate many optimize in files so many force balance jobs can be ran simultaneously.
        """
        # check that all of the fitting targets have been set
        target_names = [target.name.lower() for target in self.optimization_targets]
        for target_name in fitting_targets.keys():
            if target_name.lower() not in target_names:
                raise TargetNotSetError(
                    f"The target {target_name} is not setup for this optimizer and is required, please add it with runtime options using `set_optimization_target`."
                )

        # grab the template file
        template_file = get_data(os.path.join("templates", "optimize.txt"))
        with open(template_file) as file:
            template = Template(file.read())

        data = self.dict()
        # function to collect the priors from the targets.
        data["priors"] = priors
        # now we need to collect the fitting target data from the schema
        data["fitting_targets"] = fitting_targets
        rendered_template = template.render(**data)

        with open("optimize.in", "w") as opt_in:
            opt_in.write(rendered_template)
예제 #26
0
def test_error_cycle_complete():
    """
    Try and error cycle a task which is complete in qcarchive this should cause the task result to be collected
    and put into the optimization queue.
    """

    client = FractalClient()
    biphenyl = Molecule.from_file(get_data("biphenyl.sdf"))
    schema = get_fitting_schema(biphenyl)
    execute = Executor()
    # fake the dataset name
    execute._dataset_name = "OpenFF-benchmark-ligand-fragments-v1.0"
    task = schema.tasks[0]
    tasks = list(task.get_task_map().keys())
    # fake the task map
    execute.task_map = {
        tasks[0]:
        "[h]c1c([c:1]([c:2](c(c1[h])[h])[c:3]2[c:4](c(c(c(c2[h])cl)[h])[h])[h])[h])[h]"
    }
    execute._error_cycle_task(task=task, client=client)
    # the result should be collected and the task is now in the opt queue
    opt_task = execute.opt_queue.get(timeout=5)
    assert opt_task.ready_for_fitting is True
예제 #27
0
def test_make_fitting_schema_from_molecules():
    """
    Test making a fitting schema for a simple molecule using the default settings.
    Bace is a small molecule that should split into 2 fragments for a total of 3 torsiondrives.
    """
    bace = Molecule.from_file(file_path=get_data("bace.sdf"),
                              file_format="sdf")
    workflow = WorkflowFactory()
    fb = ForceBalanceOptimizer()
    fb.set_optimization_target(target=AbInitio_SMIRNOFF())
    workflow.set_optimizer(optimizer=fb)

    schema = workflow.fitting_schema_from_molecules(molecules=bace)
    # make sure one ethane torsion dirve is made
    assert schema.n_molecules == 1
    assert schema.n_tasks == 3
    assert bace in schema.molecules
    assert bace not in schema.entry_molecules
    # get the qcsubmit dataset
    datasets = schema.generate_qcsubmit_datasets()
    assert len(datasets) == 1
    assert datasets[0].dataset_type == "TorsiondriveDataset"
    assert datasets[0].n_records == 3