Example #1
0
def test_generate_optimise_in(tmpdir, biphenyl):
    """
    Test generating an optimize in file which captures the correct forcebalance run time settings.
    """
    with tmpdir.as_cwd():
        # parametrise the molecule
        OpenFF().run(biphenyl)
        # set some non-default values
        fb = ForceBalanceFitting(penalty_type="L2",
                                 max_iterations=100,
                                 minimum_trust_radius=10)
        tp = TorsionProfile(restrain_k=100)
        fb.add_target(target=tp)
        # now run the setup
        target_folders = tp.prep_for_fitting(molecule=biphenyl)
        fb.generate_optimise_in(target_data={tp.target_name: target_folders})
        # read the file back in
        with open("optimize.in") as opt_in:
            opt_data = opt_in.read()

        assert "penalty_type L2" in opt_data
        assert "maxstep 100" in opt_data
        assert "mintrust 10" in opt_data
        assert "restrain_k 100" in opt_data
        assert "type TorsionProfile_OpenMM" in opt_data
Example #2
0
def test_full_optimise(tmpdir, biphenyl):
    """
    Test the forcebalance wrapper by doing a full optimise run for a simple molecule. Also check that the optimised results are saved.
    """
    with tmpdir.as_cwd():
        OpenFF().run(biphenyl)
        # use default values
        fb = ForceBalanceFitting()
        fit_molecule = biphenyl.copy(deep=True)
        fit_molecule = fb.run(molecule=fit_molecule)
        # now compare the fitted results
        master_terms = None
        for old_values in biphenyl.TorsionForce:
            central_bond = old_values.atoms[1:3]
            if central_bond == (10, 11) or central_bond == (11, 10):
                # compare the k values, making sure they have been changed
                fitted_values = fit_molecule.TorsionForce[old_values.atoms]
                if master_terms is None:
                    master_terms = fitted_values.copy(deep=True)
                assert fitted_values.k1 != old_values.k1
                assert fitted_values.k2 != old_values.k2
                assert fitted_values.k3 != old_values.k3
                assert fitted_values.k4 != old_values.k4
                # make sure all terms are the same between the symmetry equivalent dihedrals
                assert fitted_values.k1 == master_terms.k1
                assert fitted_values.k2 == master_terms.k2
                assert fitted_values.k3 == master_terms.k3
                assert fitted_values.k4 == master_terms.k4
Example #3
0
def test_parameter_tags(tmpdir, force_group, ff_group, key, terms):
    """
    Make sure that the parameter tagger tags correct terms.
    """
    with tmpdir.as_cwd():
        mol = Ligand.from_file(file_name=get_data("biphenyl.sdf"))
        OpenFF().run(molecule=mol)
        # set the parameter tags
        for term in terms:
            f_group = getattr(mol, force_group)
            parameter = f_group[term]
            parameter.attributes = {"test tag"}
        # make the force field
        ff = mol._build_forcefield()
        classes = [[
            f"{mol.atoms[i].atomic_symbol}{mol.atoms[i].atom_index}"
            for i in term
        ] for term in terms]
        term_length = len(terms[0])
        # now search through and make sure the force groups were tagged
        for group in ff.iter(tag=ff_group):
            for ff_term in group.iter(tag=key):
                ff_class = [
                    ff_term.get(f"class{i}")
                    for i in range(1, 1 + term_length)
                ]
                if ff_class in classes:
                    assert ff_term.get("parametrize") == "test tag"
                else:
                    assert ff_term.get("parametrize", None) is None
Example #4
0
def test_generate_forcefield(tmpdir, biphenyl):
    """
    Test generating a fitting forcefield for forcebalance with the correct torsion terms tagged.
    """
    with tmpdir.as_cwd():
        os.mkdir("forcefield")
        # get some openff params for the ligand
        OpenFF().parametrise_molecule(molecule=biphenyl)
        ForceBalanceFitting.generate_forcefield(molecule=biphenyl)
        # load the forcefield and check for cosmetic tags
        root = ET.parse(os.path.join("forcefield", "bespoke.xml")).getroot()
        for torsion in root.iter(tag="Proper"):
            a1 = torsion.get(key="class1")
            a2 = torsion.get(key="class2")
            a3 = torsion.get(key="class3")
            a4 = torsion.get(key="class4")
            atoms = [a1, a2, a3, a4]
            dihedral = [
                int(re.search("[0-9]+", atom).group()) for atom in atoms
            ]
            # if we have the same central bond make sure we have the parametrise tag
            tag = torsion.get(key="parameterize", default=None)
            central_bond = dihedral[1:3]
            if central_bond == [10, 11] or central_bond == [11, 10]:
                # make sure we have a tag
                for t in tag.split(","):
                    assert t.strip() in ["k1", "k2", "k3", "k4"]
            else:
                assert tag is None
Example #5
0
def test_lennard_jones612(tmpdir):
    """
    Make sure that we can reproduce some reference values using the LJ612 class
    """
    with tmpdir.as_cwd():
        mol = Ligand.from_file(get_data("chloromethane.pdb"))
        # get some initial Nonbonded values
        OpenFF().run(molecule=mol)
        # get some aim reference data
        ExtractChargeData.extract_charge_data_chargemol(molecule=mol,
                                                        dir_path=get_data(""),
                                                        ddec_version=6)
        # apply symmetry to the reference data
        DDECCharges.apply_symmetrisation(molecule=mol)
        # calculate the new LJ terms
        LennardJones612(
            lj_on_polar_h=False,
            # qubekit 1 legacy parameters
            free_parameters={
                "H": h_base(r_free=1.64),
                "C": c_base(r_free=2.08),
                "Cl": cl_base(r_free=1.88),
            },
        ).run(molecule=mol)
        # make sure we get out expected reference values
        assert mol.NonbondedForce[(0, )].sigma == 0.3552211069814666
        assert mol.NonbondedForce[(0, )].epsilon == 0.25918723101839924
        assert mol.NonbondedForce[(1, )].sigma == 0.33888067968663566
        assert mol.NonbondedForce[(1, )].epsilon == 0.9650542683335082
        assert mol.NonbondedForce[(2, )].sigma == 0.22192905304751342
        assert mol.NonbondedForce[(2, )].epsilon == 0.15047278650152818
Example #6
0
def test_chargemol_template(tmpdir, version):
    """
    Make sure we can correctly render a chargemol template job.
    """
    with tmpdir.as_cwd():
        mol = Ligand.from_file(get_data("water.pdb"))
        OpenFF().parametrise_molecule(molecule=mol)
        charge_method = DDECCharges(
            apply_symmetry=True,
            basis="sto-3g",
            method="hf",
            cores=1,
            memory=1,
            ddec_version=version,
        )
        # fake the chargemol dir
        os.environ["CHARGEMOL_DIR"] = "test"
        # now render the template
        charge_method._build_chargemol_input(density_file_name="test.wfx",
                                             molecule=mol)
        with open("job_control.txt") as job_file:
            job_data = job_file.readlines()

        assert f"DDEC{version}\n" in job_data
        assert "test.wfx\n" in job_data
        assert "test/atomic_densities/\n" in job_data
        assert f"{mol.charge}\n" in job_data
Example #7
0
def test_chargemol_template(tmpdir, version, water):
    """
    Make sure we can correctly render a chargemol template job.
    """
    with tmpdir.as_cwd():
        OpenFF().run(molecule=water)
        charge_method = DDECCharges(ddec_version=version, )
        # now render the template
        charge_method._build_chargemol_input(density_file_name="test.wfx",
                                             molecule=water)
        with open("job_control.txt") as job_file:
            job_data = job_file.readlines()

        assert f"DDEC{version}\n" in job_data
        assert "test.wfx\n" in job_data
        assert f"{water.charge}\n" in job_data
Example #8
0
def test_generate_forcefield(tmpdir, biphenyl):
    """
    Test generating a fitting forcefield for forcebalance with the correct torsion terms tagged.
    """
    with tmpdir.as_cwd():
        fb = ForceBalanceFitting()
        os.mkdir("forcefield")
        # get some openff params for the ligand
        OpenFF().run(molecule=biphenyl)
        fb.generate_forcefield(molecule=biphenyl)
        # load the forcefield and check for cosmetic tags
        root = ET.parse(os.path.join("forcefield", "bespoke.xml")).getroot()
        p_tags = 0
        eval_tags = 0
        for torsion in root.iter(tag="Proper"):
            a1 = torsion.get(key="class1")
            a2 = torsion.get(key="class2")
            a3 = torsion.get(key="class3")
            a4 = torsion.get(key="class4")
            atoms = [a1, a2, a3, a4]
            dihedral = [
                int(re.search("[0-9]+", atom).group()) for atom in atoms
            ]
            central_bond = dihedral[1:3]
            # if we have the same central bond make sure we have the parametrise or eval tag
            p_tag = torsion.get(key="parameterize", default=None)
            if p_tag is not None:
                p_tags += 1
                if central_bond == (10, 11) or central_bond == (11, 10):
                    # make sure we have a tag
                    for t in p_tag.split(","):
                        assert t.strip() in ["k1", "k2", "k3", "k4"]
            else:
                eval_tag = torsion.get(key="parameter_eval", default=None)
                if eval_tag is not None:
                    eval_tags += 1

                else:
                    assert eval_tag is None
        # all torsions are the same symmetry type, so we should have one parametrize tag
        # and 3 eval tags
        assert p_tags == 1
        assert eval_tags == 3
Example #9
0
def test_full_optimise(tmpdir, biphenyl):
    """
    Test the forcebalance wrapper by doing a full optimise run for a simple molecule. Also check that the optimised results are saved.
    """
    with tmpdir.as_cwd():
        OpenFF().parametrise_molecule(biphenyl)
        # use default values
        fb = ForceBalanceFitting()
        fitted_molecule = fb.run(molecule=biphenyl)
        # now compare the fitted results
        for old_values in biphenyl.TorsionForce:
            central_bond = old_values.atoms[1:3]
            if central_bond == [10, 11] or central_bond == [11, 10]:
                # compare the k values, making sure they have been changed
                fitted_values = fitted_molecule.TorsionForce[old_values.atoms]
                assert fitted_values.k1 != old_values.k1
                assert fitted_values.k2 != old_values.k2
                assert fitted_values.k3 != old_values.k3
                assert fitted_values.k4 != old_values.k4
Example #10
0
def mol():
    """
    Initialise the Ligand molecule object with data for Chloromethane
    """
    # use temp directory to remove parametrisation files
    with TemporaryDirectory() as temp:
        os.chdir(temp)
        molecule = Ligand.from_file(file_name=get_data("chloromethane.pdb"))
        OpenFF().run(molecule)
        ddec_file_path = get_data("DDEC6_even_tempered_net_atomic_charges.xyz")
        dir_path = os.path.dirname(ddec_file_path)
        ExtractChargeData.extract_charge_data_chargemol(molecule, dir_path, 6)
        # apply symmetry to the reference data
        DDECCharges.apply_symmetrisation(molecule=molecule)
        # apply the reference charge to the nonbonded
        for atom in molecule.atoms:
            molecule.NonbondedForce[(
                atom.atom_index, )].charge = atom.aim.charge

        return molecule
Example #11
0
def test_lennard_jones612(tmpdir):
    """
    Make sure that we can reproduce some reference values using the LJ612 class
    """
    with tmpdir.as_cwd():
        mol = Ligand.from_file(get_data("chloromethane.pdb"))
        # get some initial Nonbonded values
        OpenFF().parametrise_molecule(molecule=mol)
        # get some aim reference data
        ExtractChargeData.read_files(molecule=mol,
                                     dir_path=get_data(""),
                                     charges_engine="chargemol")
        # apply symmetry to the reference data
        DDECCharges.apply_symmetrisation(molecule=mol)
        # calculate the new LJ terms
        LennardJones612().run(molecule=mol)
        # make sure we get out expected reference values
        assert mol.NonbondedForce[(0, )].sigma == 0.3552211069814666
        assert mol.NonbondedForce[(0, )].epsilon == 0.25918723101839924
        assert mol.NonbondedForce[(1, )].sigma == 0.33888067968663566
        assert mol.NonbondedForce[(1, )].epsilon == 0.9650542683335082
        assert mol.NonbondedForce[(2, )].sigma == 0.22192905304751342
        assert mol.NonbondedForce[(2, )].epsilon == 0.15047278650152818
Example #12
0
def test_param_storage(tmpdir):
    """
    Make sure the new parameter storage can be accessed and raises an error when creating parameters
    with incomplete data.
    """
    with tmpdir.as_cwd():
        mol = Ligand.from_file(get_data("chloromethane.pdb"))
        OpenFF().run(mol)
        with pytest.raises(ValidationError):
            # Try to only set one param at once (create requires all at once)
            mol.NonbondedForce.create_parameter(atoms=(0, ), charge=0.1)
        mol.NonbondedForce.create_parameter(atoms=(0, ),
                                            charge=0.1,
                                            epsilon=0.2,
                                            sigma=0.3)

        assert float(mol.NonbondedForce[(0, )].charge) == 0.1
        assert mol.NonbondedForce[(0, )].epsilon == 0.2
        assert mol.NonbondedForce[(0, )].sigma == 0.3

        mol.NonbondedForce[(0, )].charge = 5
        assert float(mol.NonbondedForce[(0, )].charge) == 5

        assert mol.BondForce[(0, 1)].k == mol.BondForce[(1, 0)].k
Example #13
0
class WorkFlow(SchemaBase):

    type: Literal["WorkFlow"] = "WorkFlow"
    qc_options: QCOptions = Field(
        QCOptions(),
        description=
        "The QC options to be used for all QC calculations apart from implicit solvent.",
    )
    local_resources: LocalResource = Field(
        LocalResource(),
        description=
        "The local resource options for the workflow like total memory and cores available.",
    )
    parametrisation: Union[OpenFF, XML, AnteChamber] = Field(
        OpenFF(),
        description=
        "The parametrisation engine which should be used to assign initial parameters.",
    )
    optimisation: Optimiser = Field(
        Optimiser(),
        description=
        "The main geometry optimiser settings including pre_optimisation settings.",
    )
    charges: Union[DDECCharges, MBISCharges] = Field(
        DDECCharges(),
        description=
        "The method that should be used to calculate the AIM reference data the charge should be extracted from. Note that the non-bonded parameters are also calculated from this data.",
    )
    virtual_sites: Optional[VirtualSites] = Field(
        VirtualSites(),
        description=
        "The method that should be used to fit virtual sites if they are requested.",
    )
    non_bonded: LennardJones612 = Field(
        get_protocol(protocol_name="0"),
        description=
        "The method that should be used to calculate the non-bonded non-charge parameters and their functional form.",
    )
    bonded_parameters: Union[ModSeminario, QForceHessianFitting] = Field(
        ModSeminario(),
        description=
        "The method that should be used to optimise the bonded parameters.",
    )
    torsion_scanner: TorsionScan1D = Field(
        TorsionScan1D(),
        description="The method that should be used to drive the torsions",
    )
    torsion_optimisation: ForceBalanceFitting = Field(
        ForceBalanceFitting(),
        description=
        "The method that should be used to optimise the scanned soft dihedrals.",
    )
    hessian: ClassVar[Hessian] = Hessian()

    _results_fname: str = PrivateAttr("workflow_result.json")

    @classmethod
    def from_results(cls, results: WorkFlowResult):
        """Build a workflow from the provenance info in the results object."""
        model_data = {
            "qc_options": results.qc_spec.dict(),
            "local_resources": results.local_resources.dict(),
        }
        # now loop over the stages and update the options
        for stage_name, result in results.results.items():
            if stage_name != "Hessian":  # this stage has no settings
                model_data[stage_name] = result.stage_settings
        return cls(**model_data)

    def validate_workflow(self,
                          workflow: List[str],
                          molecule: Optional[Ligand] = None) -> None:
        """
        Make sure that the workflow can be run ahead of time by looking for errors in the QCspecification and missing dependencies.

        Args:
            workflow: The list of stages to be run which should be checked.
        """
        # first check the general qc spec
        self.qc_options.validate_specification()
        # then check each component for missing dependencies
        for field in workflow:
            stage = getattr(self, field)
            # some stages are optional and should be skipped
            if stage is not None:
                stage.is_available()
        # check special stages
        # check that the pre_opt method is available
        if "optimisation" in workflow:
            stage = self.optimisation
            if stage.pre_optimisation_method is not None:
                pre_spec = stage.convert_pre_opt(
                    method=stage.pre_optimisation_method)
                pre_spec.validate_specification()
        # if we are doing nonbonded check the element coverage
        if "non_bonded" in workflow and molecule is not None:
            self.non_bonded.check_element_coverage(molecule=molecule)

    def to_file(self, filename: str) -> None:
        """
        Write the workflow to file supported file types are json or yaml.
        """
        f_type = filename.split(".")[-1]
        with open(filename, "w") as output:
            if f_type in ["yaml" or "yml"]:
                import yaml

                output.write(yaml.dump(self.dict()))
            else:
                output.write(self.json(indent=2))

    @classmethod
    def get_running_order(
        cls,
        start: Optional[str] = None,
        skip_stages: Optional[List[str]] = None,
        end: Optional[str] = None,
    ) -> List[str]:
        """Work out the running order based on any skip stages and the end point.

        Args:
            start: The starting stage for the workflow.
            skip_stages: A list of stages to remove from the workflow.
            end: The final stage which should be executed.

        Returns:
            A list of stage names in the order they will be ran in.
        """
        normal_workflow = [
            "parametrisation",
            "optimisation",
            "hessian",
            "charges",
            "virtual_sites",
            "non_bonded",
            "bonded_parameters",
            "torsion_scanner",
            "torsion_optimisation",
        ]
        if skip_stages is not None:
            for stage in skip_stages:
                try:
                    normal_workflow.remove(stage)
                except ValueError:
                    continue
        if start is not None:
            start_id = normal_workflow.index(start)
        else:
            start_id = None
        if end is not None:
            end_id = normal_workflow.index(end) + 1
        else:
            end_id = None

        return normal_workflow[start_id:end_id]

    def _build_initial_results(self, molecule: Ligand) -> WorkFlowResult:
        """Build the initial results schema using the workflow."""
        workflow = self.get_running_order()
        result = WorkFlowResult(
            version=qubekit.__version__,
            input_molecule=molecule.copy(deep=True),
            qc_spec=self.qc_options,
            current_molecule=molecule,
            local_resources=self.local_resources,
        )
        # for each stage set if they are to be ran
        for stage_name in workflow:
            stage: Optional[StageBase] = getattr(self, stage_name)
            if stage is not None:
                stage_result = StageResult(stage=stage.type,
                                           stage_settings=stage.dict(),
                                           status=Status.Waiting)
            else:
                stage_result = StageResult(stage=None,
                                           stage_settings=None,
                                           status=Status.Waiting)

            result.results[stage_name] = stage_result
        return result

    def _get_optional_stage_skip(
            self, skip_stages: Optional[List[str]]) -> Optional[List[str]]:
        """
        Add any optional stages which are skipped when not supplied, to the skip stages list.
        """
        if self.virtual_sites is None and skip_stages is not None:
            # we get a tuple from click so we can not append
            return [*skip_stages, "virtual_sites"]

        elif self.virtual_sites is None and skip_stages is None:
            return ["virtual_sites"]

        return skip_stages

    def restart_workflow(
        self,
        start: str,
        result: WorkFlowResult,
        skip_stages: Optional[List[str]] = None,
        end: Optional[str] = None,
    ) -> WorkFlowResult:
        """
        Restart the workflow from the given stage and continue the run.

        Args:
            start: The name of the stage we want to restart the workflow from.
            result: The past run results object which will be updated and that starting molecule will be extracted from.
            skip_stages: The list of stage names which should be skipped.
            end: The name of the last stage to be computed before finishing.
        """
        molecule = result.current_molecule
        skip_stages = self._get_optional_stage_skip(skip_stages=skip_stages)
        run_order = self.get_running_order(start=start,
                                           skip_stages=skip_stages,
                                           end=end)
        # update local and qc options
        result.qc_spec = self.qc_options
        result.local_resources = self.local_resources
        return self._run_workflow(molecule=molecule,
                                  results=result,
                                  workflow=run_order)

    def new_workflow(
        self,
        molecule: Ligand,
        skip_stages: Optional[List[str]] = None,
        end: Optional[str] = None,
    ) -> WorkFlowResult:
        """
        The main worker method to be used for starting new workflows.

        Args:
            molecule: The molecule to be re-parametrised using QUBEKit.
            skip_stages: A list of stages which should be skipped in the workflow.
            end: The last stage to be computed before finishing, useful to finish early.
        """
        # get the running order
        skip_stages = self._get_optional_stage_skip(skip_stages=skip_stages)
        run_order = self.get_running_order(skip_stages=skip_stages, end=end)
        results = self._build_initial_results(molecule=molecule)
        # if we have any skips assign them
        for stage_name, stage_result in results.results.items():
            if stage_name not in run_order:
                stage_result.status = Status.Skip

        return self._run_workflow(molecule=molecule,
                                  workflow=run_order,
                                  results=results)

    def _run_workflow(
        self,
        molecule: "Ligand",
        workflow: List[str],
        results: WorkFlowResult,
    ) -> WorkFlowResult:
        """
        The main run method of the workflow which will execute each stage inorder on the given input molecule.

        Args:
            molecule: The molecule to be re-parametrised using QUBEKit.
            workflow: The list of prefiltered stage names which should be ran in order.
            results: The results object that we should update throughout the workflow.

        Returns:
            A fully parametrised molecule.
        """
        # try and find missing dependencies
        self.validate_workflow(workflow=workflow, molecule=molecule)
        # start message
        # TODO Move to outside workflow so this doesn't get printed for every run in bulk.
        print(
            "If QUBEKit ever breaks or you would like to view timings and loads of other info, "
            "view the log file.\nOur documentation (README.md) "
            "also contains help on handling the various commands for QUBEKit.\n"
        )
        # write out the results object to track the status at the start
        results.to_file(filename=self._results_fname)

        # loop over stages and run
        for field in workflow:
            stage: StageBase = getattr(self, field)
            # some stages print based on what spec they are using
            print(stage.start_message(qc_spec=self.qc_options))
            molecule = self._run_stage(stage_name=field,
                                       stage=stage,
                                       molecule=molecule,
                                       results=results)
            print(stage.finish_message())

        # now the workflow has finished
        # write final results
        results.to_file(filename=self._results_fname)
        # write out final parameters
        with folder_setup("final_parameters"):
            # if we have U-B terms we need to write a non-standard pdb file
            if molecule.has_ub_terms():
                molecule._to_ub_pdb()
            else:
                molecule.to_file(file_name=f"{molecule.name}.pdb")
            molecule.write_parameters(file_name=f"{molecule.name}.xml")

        return results

    def _run_stage(
        self,
        stage_name: str,
        stage: StageBase,
        molecule: Ligand,
        results: WorkFlowResult,
    ) -> Ligand:
        """
        A stage wrapper to run the stage and update the results workflow in place.
        """
        home = os.getcwd()
        # update settings and set to running and save
        stage_result = StageResult(stage=stage.type,
                                   stage_settings=stage.dict(),
                                   status=Status.Running)
        results.results[stage_name] = stage_result
        results.modified_date = datetime.now().strftime("%Y_%m_%d")
        results.to_file(filename=self._results_fname)
        make_and_change_into(name=stage_name)
        try:
            # run the stage and save the result
            result_mol = stage.run(
                molecule=molecule,
                qc_spec=self.qc_options,
                local_options=self.local_resources,
            )
            stage_result.status = Status.Done
            results.current_molecule = result_mol
        except MissingReferenceData:
            # this means there are no torsions to scan so simulate it working
            stage_result.status = Status.Done
            results.current_molecule = molecule
        except Exception as e:
            import traceback

            # save the error do not update the current molecule
            stage_result.status = Status.Error
            stage_result.error = traceback.extract_tb(e.__traceback__).format()
            os.chdir(home)
            results.results[stage_name] = stage_result
            results.to_file(self._results_fname)
            # write the exception to file
            with open("QUBEKit.err", "w") as output:
                traceback.print_exc(file=output)

            raise WorkFlowExecutionError(
                f"The workflow stopped unexpectedly due to the following error at stage: {stage_name}"
            ) from e

        # move back
        os.chdir(home)
        # update the results
        results.results[stage_name] = stage_result
        results.to_file(self._results_fname)
        return results.current_molecule
Example #14
0
def openff():
    return OpenFF(force_field="openff_unconstrained-2.0.0.offxml")