Ejemplo n.º 1
0
    def test_delta_add_to_dataset(self):
        atoms_copy = self.OAL_learner.parent_dataset[-1].copy()

        atoms_copy.set_calculator(EMT())
        [atoms_delta_sub] = self.OAL_learner.add_to_dataset(atoms_copy)
        delta_sub_energy = atoms_delta_sub.get_potential_energy()
        delta_sub_forces = atoms_delta_sub.get_forces()

        atoms_copy = atoms_copy.copy()
        atoms_copy.set_calculator(EMT())
        (atoms_parent, ) = convert_to_singlepoint([atoms_copy])
        parent_energy = atoms_parent.get_potential_energy()
        parent_forces = atoms_parent.get_forces()

        atoms_copy = atoms_copy.copy()
        atoms_copy.set_calculator(self.OAL_learner.base_calc)
        (atoms_base, ) = convert_to_singlepoint([atoms_copy])
        base_energy = atoms_base.get_potential_energy()
        base_forces = atoms_base.get_forces()

        parent_ref_energy = self.OAL_learner.refs[0].get_potential_energy()
        base_ref_energy = self.OAL_learner.refs[1].get_potential_energy()

        delta_hand_energy = (delta_sub_energy + parent_ref_energy) + (
            base_energy - base_ref_energy)
        delta_hand_forces = delta_sub_forces + base_forces

        assert self.OAL_learner.parent_dataset[-1] == atoms_parent

        assert np.allclose(
            delta_hand_energy,
            parent_energy,
            atol=ENERGY_THRESHOLD,
        ), str(
            "DeltaLearner add_to_dataset() + base calc energy inconsistent:\n"
            + str(delta_hand_energy) + "with calculated parent prediction:\n" +
            str(parent_energy) +
            "\ncomposed of:\n  the add_to_dataset() parent prediction:\n" +
            str(delta_sub_energy) + "\n  the base calc prediction:\n" +
            str(base_energy) + "\n  the parent ref:\n" +
            str(parent_ref_energy) + "\n  the base ref:\n" +
            str(base_ref_energy) + "\nfor Energy Threshold: " +
            str(ENERGY_THRESHOLD))

        assert np.allclose(
            delta_hand_forces,
            parent_forces,
            atol=FORCE_THRESHOLD,
        ), str(
            "DeltaLearner add_to_dataset() + base calc forces inconsistent:\n"
            + str(delta_hand_forces) + "with calculated parent prediction:\n" +
            str(parent_forces) +
            "\ncomposed of:\n  the add_to_dataset() parent prediction:\n" +
            str(delta_sub_forces) + "\n  the base calc prediction:\n" +
            str(base_forces) + "\nfor Force Threshold: " +
            str(FORCE_THRESHOLD))
Ejemplo n.º 2
0
    def test_delta_get_ml_prediction(self):
        atoms_copy = self.OAL_learner.parent_dataset[-1].copy()

        atoms_ML = self.OAL_learner.get_ml_prediction(atoms_copy.copy())
        delta_sub_energy = atoms_ML.get_potential_energy()
        delta_sub_forces = atoms_ML.get_forces()

        atoms_copy = atoms_copy.copy()
        atoms_copy.set_calculator(self.OAL_learner.ml_potential)
        (atoms_ml_trained_on_diff_, ) = convert_to_singlepoint([atoms_copy])
        ml_trained_on_diff_energy = atoms_ml_trained_on_diff_.get_potential_energy(
        )
        ml_trained_on_diff_forces = atoms_ml_trained_on_diff_.get_forces()

        atoms_copy = atoms_copy.copy()
        atoms_copy.set_calculator(self.OAL_learner.base_calc)
        (atoms_base, ) = convert_to_singlepoint([atoms_copy])
        base_energy = atoms_base.get_potential_energy()
        base_forces = atoms_base.get_forces()

        parent_ref_energy = self.OAL_learner.refs[0].get_potential_energy()
        base_ref_energy = self.OAL_learner.refs[1].get_potential_energy()

        # ml = (parent -parent ref) - (base - base ref)
        # parent = (base - base ref) + (ml + parent ref)
        delta_hand_energy = (ml_trained_on_diff_energy + parent_ref_energy) + (
            base_energy - base_ref_energy)
        delta_hand_forces = ml_trained_on_diff_forces + base_forces

        assert np.allclose(
            delta_sub_energy,
            delta_hand_energy,
            atol=ENERGY_THRESHOLD,
        ), str("DeltaLearner get_ml_prediction() energy inconsistent:\n" +
               str(delta_sub_energy) +
               "\nwith hand calculated ML prediction delta:\n" +
               str(delta_hand_energy) +
               "\ncomposed of:\n  the ML trained on difference prediction:\n" +
               str(ml_trained_on_diff_energy) +
               "\n  the base calc prediction:\n" + str(base_energy) +
               "\n  the parent ref:\n" + str(parent_ref_energy) +
               "\n  the base ref:\n" + str(base_ref_energy) +
               "\nfor Energy Threshold: " + str(ENERGY_THRESHOLD))

        assert np.allclose(
            delta_sub_forces,
            delta_hand_forces,
            atol=FORCE_THRESHOLD,
        ), str("DeltaLearner get_ml_prediction() forces inconsistent:\n" +
               str(delta_sub_forces) +
               "\nwith hand calculated ML prediction delta:\n" +
               str(delta_hand_forces) +
               "\ncomposed of:\n  the ML trained on difference prediction:\n" +
               str(ml_trained_on_diff_forces) +
               "\n  the base calc prediction:\n" + str(base_forces) +
               "\nfor Force Threshold: " + str(FORCE_THRESHOLD))
Ejemplo n.º 3
0
    def calculate(self, atoms, properties, system_changes):
        Calculator.calculate(self, atoms, properties, system_changes)

        # If we have less than two data points, uncertainty is not
        # well calibrated so just use DFT
        if len(self.parent_dataset) < 2:
            energy, force = self.add_data_and_retrain(atoms)
            self.results["energy"] = energy
            self.results["forces"] = force
            return

        # Make a copy of the atoms with ensemble energies as a SP
        atoms_copy = atoms.copy()
        atoms_copy.set_calculator(self.ensemble_calc)
        (atoms_ML, ) = convert_to_singlepoint([atoms_copy])

        # Check if we are extrapolating too far, and if so add/retrain
        if self.unsafe_prediction(atoms_ML) or self.parent_verify(atoms_ML):
            # We ran DFT, so just use that energy/force
            energy, force = self.add_data_and_retrain(atoms)
        else:
            energy = atoms_ML.get_potential_energy(apply_constraint=False)
            force = atoms_ML.get_forces(apply_constraint=False)

        # Return the energy/force
        self.results["energy"] = energy
        self.results["forces"] = force
Ejemplo n.º 4
0
 def get_ml_prediction(self, atoms):
     """
     Helper function which takes an atoms object with no calc attached.
     Makes an Ml prediction.
     Performs a delta add operation since the ML model was trained on delta sub data.
     Returns it with a delta ML potential predicted singlepoint.
     Designed to be overwritten by DeltaLearner which needs to modify ML predictions.
     """
     atoms_copy = atoms.copy()
     atoms_copy.set_calculator(self.ml_potential)
     (atoms_with_info,) = convert_to_singlepoint([atoms_copy])
     atoms_copy.set_calculator(self.add_delta_calc)
     (atoms_ML,) = convert_to_singlepoint([atoms_copy])
     for key, value in atoms_with_info.info.items():
         atoms_ML.info[key] = value
     return atoms_ML
Ejemplo n.º 5
0
    def __init__(
        self,
        learner_params,
        trainer,
        parent_dataset,
        parent_calc,
    ):
        Calculator.__init__(self)

        self.parent_calc = parent_calc
        self.trainer = trainer
        self.learner_params = learner_params
        self.parent_dataset = convert_to_singlepoint(parent_dataset)

        # Don't bother making an ensemble with only one data point,
        # as the uncertainty is meaningless
        if len(self.parent_dataset) > 1:
            self.ensemble_sets, self.parent_dataset = non_bootstrap_ensemble(
                parent_dataset, n_ensembles=self.learner_params["n_ensembles"])
            self.ensemble_calc = EnsembleCalc.make_ensemble(
                self.ensemble_sets, self.trainer)

        if "fmax_verify_threshold" in self.learner_params:
            self.fmax_verify_threshold = self.learner_params[
                "fmax_verify_threshold"]
        else:
            self.fmax_verify_threshold = np.nan  # always False

        self.uncertain_tol = learner_params["uncertain_tol"]
        self.parent_calls = 0
Ejemplo n.º 6
0
 def get_ml_prediction(self, atoms):
     """
     Helper function which takes an atoms object with no calc attached.
     Returns it with an ML potential predicted singlepoint.
     Designed to be overwritten by subclasses (DeltaLearner) that modify ML predictions.
     """
     atoms_copy = atoms.copy()
     atoms_copy.set_calculator(self.ml_potential)
     (atoms_ML, ) = convert_to_singlepoint([atoms_copy])
     return atoms_ML
Ejemplo n.º 7
0
 def init_training_data(self):
     """
     Prepare the training data by attaching delta values for training.
     """
     raw_data = self.parent_dataset
     sp_raw_data = convert_to_singlepoint(raw_data)
     parent_ref_image = sp_raw_data[0].copy()
     base_ref_image = compute_with_calc(sp_raw_data[:1], self.base_calc)[0]
     self.refs = [parent_ref_image, base_ref_image]
     self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs)
     self.ensemble_sets, self.parent_dataset = bootstrap_ensemble(
         compute_with_calc(sp_raw_data, self.delta_sub_calc))
Ejemplo n.º 8
0
    def init_training_data(self):
        """
        Prepare the training data by attaching delta values for training.
        """

        raw_data = self.training_data
        sp_raw_data = convert_to_singlepoint(raw_data)
        parent_ref_image = sp_raw_data[0]
        base_ref_image = compute_with_calc([parent_ref_image],
                                           self.base_calc)[0]
        self.refs = [parent_ref_image, base_ref_image]
        self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs)
        self.training_data = compute_with_calc(sp_raw_data,
                                               self.delta_sub_calc)
Ejemplo n.º 9
0
    def init_training_data(self):
        """
        Prepare the training data by attaching delta values for training.
        """

        raw_data = self.training_data
        sp_raw_data = convert_to_singlepoint(raw_data)
        parent_ref_image = sp_raw_data[0]
        base_ref_image = compute_with_calc([parent_ref_image],
                                           self.base_calc)[0]
        self.refs = [parent_ref_image, base_ref_image]
        self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs)
        self.training_data = []
        for image in sp_raw_data:
            sp_calc = image.get_calculator()
            sp_calc.implemented_properties = ["energy", "forces"]
            sp_delta_calc = DeltaCalc([sp_calc, self.base_calc], "sub",
                                      self.refs)
            self.training_data += compute_with_calc([image], sp_delta_calc)
Ejemplo n.º 10
0
    def add_data_and_retrain(self, atoms):
        print("OnlineLearner: Parent calculation required")

        atoms_copy = atoms.copy()
        atoms_copy.set_calculator(copy.copy(self.parent_calc))
        (new_data, ) = convert_to_singlepoint([atoms_copy])

        energy_actual = new_data.get_potential_energy(apply_constraint=False)
        force_actual = new_data.get_forces(apply_constraint=False)

        self.ensemble_sets, self.parent_dataset = non_bootstrap_ensemble(
            self.parent_dataset,
            new_data,
            n_ensembles=self.learner_params["n_ensembles"],
        )

        # Don't bother training if we have less than two datapoints
        if len(self.parent_dataset) >= 2:
            self.ensemble_calc = EnsembleCalc.make_ensemble(
                self.ensemble_sets, self.trainer)

        self.parent_calls += 1

        return energy_actual, force_actual
Ejemplo n.º 11
0
    def add_data_and_retrain(self, atoms):
        self.steps_since_last_query = 0

        # don't redo singlepoints if not instructed to reverify and atoms have proper vasp singlepoints attached
        if (self.reverify_with_parent is False and hasattr(atoms, "calc")
                and atoms.calc is not None
                and (type(atoms.calc) is SinglePointCalculator
                     or atoms.calc.name == self.parent_calc.name)):
            if not self.suppress_warnings:
                warn(
                    "Assuming Atoms object Singlepoint is precalculated (to turn this behavior off: set 'reverify_with_parent' to True)"
                )
            new_data = atoms

        # if verifying (or reverifying) do the singlepoints, and record the time parent calls takes
        else:
            print("OnlineLearner: Parent calculation required")
            start = time.time()

            atoms.set_calculator(self.parent_calc)
            (new_data, ) = convert_to_singlepoint([atoms])
            end = time.time()
            print("Time to call parent (call #" + str(self.parent_calls) +
                  "): " + str(end - start))
            self.info["parent_time"] = end - start

        # add to complete dataset (for atomistic methods optimizer replay)
        if self.store_complete_dataset:
            self.complete_dataset.append(new_data)
        else:
            self.complete_dataset = [new_data]

        # before adding to parent (training) dataset, convert to top k forces if applicable
        if self.train_on_top_k_forces is not None:
            [training_data
             ] = convert_to_top_k_forces([new_data],
                                         self.train_on_top_k_forces)
        else:
            training_data = new_data

        # add to parent dataset (for training) and return partial dataset (for partial fit)
        partial_dataset = self.add_to_dataset(training_data)

        self.parent_calls += 1

        start = time.time()
        # retrain the ml potential only if there is more than enough data that the ml potential may be used
        if len(self.parent_dataset) > self.num_initial_points:
            # if training only on recent points, and have trained before, then check if dataset has become long enough to train on subset
            if (self.trained_at_least_once
                    and (self.train_on_recent_points is not None) and
                (len(self.parent_dataset) > self.train_on_recent_points)):
                self.ml_potential.train(
                    self.parent_dataset[-self.train_on_recent_points:])
            # otherwise, if partial fitting, partial fit if not training for the first time
            elif (self.trained_at_least_once
                  and (self.train_on_recent_points is None)
                  and (self.partial_fit)):
                self.ml_potential.train(self.parent_dataset, partial_dataset)
            # otherwise just train as normal
            else:
                self.ml_potential.train(self.parent_dataset)
                self.trained_at_least_once = True

        # if the data requirement has just been met: train for the first time on only the initial points to keep
        elif len(self.parent_dataset) == self.num_initial_points:
            new_parent_dataset = [
                self.parent_dataset[i] for i in self.initial_points_to_keep
            ]
            self.parent_dataset = new_parent_dataset
            self.num_initial_points = len(self.parent_dataset)

            self.ml_potential.train(self.parent_dataset)
            self.trained_at_least_once = True
        end = time.time()
        self.info["training_time"] = end - start

        # set the energy and force results of the parent calculator and return them
        energy_actual = new_data.get_potential_energy(
            apply_constraint=self.constraint)
        force_actual = new_data.get_forces(apply_constraint=self.constraint)
        force_cons = new_data.get_forces()
        return energy_actual, force_actual, force_cons