def _reward(self) -> float:
        """
        Notes
        -----
        Logged parameters:
        
        * energy (float): the energy of the current conformer
        """
        config = self.config

        self.backup_mol.AddConformer(self.conf, assignId=True)
        energy = get_conformer_energy(self.mol)
        self.step_info['energy'] = energy
        self.backup_energys.append(energy)

        self._prune_conformers()

        total_reward = np.log(
            np.exp(-1.0 * (np.array(self.backup_energys) - config.E0) /
                   (KB * config.tau)).sum() / config.Z0)
        if not np.isfinite(total_reward):
            total_reward = np.finfo(np.float64).eps

        rew = total_reward - self.total_reward
        return rew
    def _reward(self):
        energy = get_conformer_energy(self.mol)
        self.step_info['energy'] = energy  # log energy

        reward = 1. if energy < self.energy_thresh else 0.
        if energy < self.energy_thres:
            self.confs_below_threshold += 1
            self.episode_info[
                'confs_below_threshold'] = self.confs_below_threshold
        return reward
    def _reward(self) -> float:
        """Returns :math:`e^{-1 * energy}` where :math:`energy` is the
        energy of the current conformer of the molecule.

        Notes
        -----
        Logged parameters:

        * energy (float): the energy of the current conformer
        """
        energy = get_conformer_energy(self.mol)
        reward = np.exp(-1. * energy)

        self.step_info['energy'] = energy
        return reward
    def _reward(self) -> float:
        """
        Notes
        -----
        Logged parameters:

        * energy (float): the energy of the current conformer
        """
        config = self.config

        self.backup_mol.AddConformer(self.conf, assignId=True)

        energy = get_conformer_energy(self.mol)
        self.step_info['energy'] = energy

        reward = np.exp(-1. * (energy - config.E0) /
                        (KB * config.tau)) / config.Z0

        if self._done():
            reward -= self._pruning_penalty()
        return reward
    def _reward(self) -> float:
        """
        Notes
        -----
        Logged parameters:

        * energy (float): the energy of the current conformer
        * repeat (int): total number of repeated actions so far in the episode
        """
        config = self.config

        energy = get_conformer_energy(self.mol)
        self.step_info['energy'] = energy

        if tuple(self.action) in self.seen:
            self.repeats += 1
            self.episode_info['repeat'] = self.repeats
            return 0.
        else:
            self.seen.add(tuple(self.action))
            reward = np.exp(-1. * (energy - config.E0) /
                            (KB * config.tau)) / config.Z0
            return reward