def __init__(self,
                 mutable_params: Union[dict, list],
                 abort_reward: int,
                 kernel: Kern,
                 gp_params: Dict[str, Any],
                 ctrls: List[Controller],
                 obs_template: Mapping[str, List[Union[List[str],
                                                       np.ndarray]]],
                 obs_varnames: List[str] = None,
                 **kwargs):
        """
        Agent to execute safeopt algorithm (https://arxiv.org/abs/1509.01066) to control the environment by using
        auxiliary controllers and Gaussian process to adopt the controller parameters (mutable_params) to safely
        increase the performance.

        :param mutable_params: safe inital controller parameters to adopt
        :param abort_reward: factor to multiply with the initial reward to give back an abort_reward-times higher
               negative reward in case of limit exceeded
        :param kernel: kernel for the Gaussian process unsing GPy
        :param gp_params: kernel parameters like bounds and lengthscale
        :param ctrls: Controllers that are feed with the observations and exert actions on the environment
        :param obs_template:
            Template describing how the observation array should be transformed and passed to the internal controllers.
            The key must match the name field of an internal controller.
            The values are a list of:
                - list of strings
                    - matching variable names of the state
                    - must match self.obs_varnames
                    - will be substituted by the values on runtime
                    - will be passed as an np.array of floats to the controller
                - np.ndarray of floats (to be passed statically to the controller)
                - a mixture of static and dynamic values in one parameter is not supported for performance reasons.
            The values will be passed as parameters to the controllers step function.
        :param obs_varnames: list of variable names that match the values of the observations
         passed in the act function. Will be automatically set by the Runner class
        """
        self.params = MutableParams(
            list(mutable_params.values()) if isinstance(mutable_params, dict
                                                        ) else mutable_params)
        self.kernel = kernel
        self.bounds = gp_params['bounds']
        self.noise_var = gp_params['noise_var']
        self.prior_mean = gp_params['prior_mean']
        self.safe_threshold = gp_params['safe_threshold']
        self.explore_threshold = gp_params['explore_threshold']

        self.abort_reward = abort_reward
        self.episode_reward = None
        self.optimizer = None
        self.inital_performance = None
        self.last_best_performance = None
        self.performance = None

        self._iterations = 0

        super().__init__(ctrls, obs_template, obs_varnames, **kwargs)
        self.history.cols = ['J', 'Params']
Esempio n. 2
0
    def __init__(self,
                 mutable_params: Union[dict, list],
                 abort_reward: int,
                 kernel: Kern,
                 gp_params: Dict[str, Any],
                 ctrls: Dict[str, Controller],
                 observation_action_mapping: dict,
                 history=EmptyHistory()):
        """
        Agent to execute safeopt algorithm (https://arxiv.org/abs/1509.01066) to control the environment by using
        auxiliary controllers and Gaussian process to adopt the controller parameters (mutable_params) to safely
        increase the performance.

        :param mutable_params: safe inital controller parameters to adopt
        :param abort_reward: factor to multiply with the initial reward to give back an abort_reward-times higher
               negative reward in case of limit exceeded
        :param kernel: kernel for the Gaussian process unsing GPy
        :param gp_params: kernel parameters like bounds and lengthscale
        :param ctrls: Controllers that are feed with the observations and exert actions on the environment
        :param observation_action_mapping: form controller keys to observation keys, whose observation values will be
         passed to the controller
        :param history:Storage of internal data
        """
        self.params = MutableParams(
            list(mutable_params.values()) if isinstance(mutable_params, dict
                                                        ) else mutable_params)
        self.kernel = kernel
        self.bounds = gp_params['bounds']
        self.noise_var = gp_params['noise_var']
        self.prior_mean = gp_params['prior_mean']
        self.safe_threshold = gp_params['safe_threshold']
        self.explore_threshold = gp_params['explore_threshold']

        self.abort_reward = abort_reward
        self.episode_reward = None
        self.optimizer = None
        self.inital_Performance = None
        self._iterations = 0
        super().__init__(ctrls, observation_action_mapping, history)
        self.history.cols = ['J', 'Params']
Esempio n. 3
0
class SafeOptAgent(StaticControlAgent):
    def __init__(self,
                 mutable_params: Union[dict, list],
                 abort_reward: int,
                 kernel: Kern,
                 gp_params: Dict[str, Any],
                 ctrls: Dict[str, Controller],
                 observation_action_mapping: dict,
                 history=EmptyHistory()):
        """
        Agent to execute safeopt algorithm (https://arxiv.org/abs/1509.01066) to control the environment by using
        auxiliary controllers and Gaussian process to adopt the controller parameters (mutable_params) to safely
        increase the performance.

        :param mutable_params: safe inital controller parameters to adopt
        :param abort_reward: factor to multiply with the initial reward to give back an abort_reward-times higher
               negative reward in case of limit exceeded
        :param kernel: kernel for the Gaussian process unsing GPy
        :param gp_params: kernel parameters like bounds and lengthscale
        :param ctrls: Controllers that are feed with the observations and exert actions on the environment
        :param observation_action_mapping: form controller keys to observation keys, whose observation values will be
         passed to the controller
        :param history:Storage of internal data
        """
        self.params = MutableParams(
            list(mutable_params.values()) if isinstance(mutable_params, dict
                                                        ) else mutable_params)
        self.kernel = kernel
        self.bounds = gp_params['bounds']
        self.noise_var = gp_params['noise_var']
        self.prior_mean = gp_params['prior_mean']
        self.safe_threshold = gp_params['safe_threshold']
        self.explore_threshold = gp_params['explore_threshold']

        self.abort_reward = abort_reward
        self.episode_reward = None
        self.optimizer = None
        self.inital_Performance = None
        self._iterations = 0
        super().__init__(ctrls, observation_action_mapping, history)
        self.history.cols = ['J', 'Params']

    def reset(self):
        """
        Resets the kernel, episodic reward and the optimizer
        """
        # reinstantiate kernel

        kernel_params = self.kernel.to_dict()
        cls_name = kernel_params['class']
        mod = importlib.import_module('.'.join(cls_name.split('.')[:-1]))
        cls = getattr(mod, cls_name.split('.')[-1])
        remaining_params = {
            k: v
            for k, v in kernel_params.items() if k not in {'class', 'useGPU'}
        }
        self.kernel = cls(**remaining_params)

        self.params.reset()
        self.optimizer = None
        self.episode_reward = 0
        self.inital_Performance = None
        self._iterations = 0
        return super().reset()

    def observe(self, reward, terminated):
        """
        Makes an observation of the enviroment.
        If terminated, then caclulates the performance and the next values for the parameters using safeopt

        :param reward: reward of the simulation step
        :param terminated: True if episode is over or aborted
        :return:
        """
        self._iterations += 1
        self.episode_reward += reward or 0
        if terminated:
            # calculate MSE, divide Summed error by length of measurement
            self.episode_reward = self.episode_reward / self._iterations
            # safeopt update step
            self.update_params()
            # reset for new episode
            self.prepare_episode()
        # on other steps we don't need to do anything

    def update_params(self):
        """
        Sets up the Gaussian process in the first episodes, updates the parameters in the following.
        """
        if self.optimizer is None:
            # First Iteration
            # self.inital_Performance = 1 / self.episode_reward
            self.inital_Performance = self.episode_reward
            # Norm for Safe-point
            # J = 1 / self.episode_reward / self.inital_Performance

            J = self.inital_Performance

            # Define Mean "Offset": Like BK: Assume Mean = Threshold (BK = 0, now = 20% below first (safe) J: means: if
            # new Performance is 20 % lower than the inital we assume as unsafe)
            mf = GPy.core.Mapping(len(self.bounds), 1)
            mf.f = lambda x: self.prior_mean * J
            mf.update_gradients = lambda a, b: 0
            mf.gradients_X = lambda a, b: 0

            gp = GPy.models.GPRegression(np.array([self.params[:]]),
                                         np.array([[J]]),
                                         self.kernel,
                                         noise_var=self.noise_var,
                                         mean_function=mf)
            self.optimizer = SafeOptSwarm(gp,
                                          self.safe_threshold * J,
                                          bounds=self.bounds,
                                          threshold=self.explore_threshold * J)

        else:

            if np.isnan(self.episode_reward):
                # set r to doubled (negative!) initial reward
                self.episode_reward = self.abort_reward * self.inital_Performance
                # toDo: set reward to -inf and stop agent?
                # warning mit logger
                logger.warning(
                    'UNSAFE! Limit exceeded, epsiode abort, give a reward of {} times the'
                    'initial reward'.format(self.abort_reward))

            J = self.episode_reward

            self.optimizer.add_new_data_point(self.params[:], J)

        self.history.append([J, self.params[:]])
        self.params[:] = self.optimizer.optimize()

    def render(self):
        """
        Renders the results for the performance
        """
        plt.figure()
        self.optimizer.plot(1000)
        plt.show()

    def prepare_episode(self):
        """
        Prepares the next episode; reset iteration counting variable and call superclass to reset controllers
        """
        self._iterations = 0
        super().prepare_episode()
class SafeOptAgent(StaticControlAgent):
    def __init__(self,
                 mutable_params: Union[dict, list],
                 abort_reward: int,
                 kernel: Kern,
                 gp_params: Dict[str, Any],
                 ctrls: List[Controller],
                 obs_template: Mapping[str, List[Union[List[str],
                                                       np.ndarray]]],
                 obs_varnames: List[str] = None,
                 **kwargs):
        """
        Agent to execute safeopt algorithm (https://arxiv.org/abs/1509.01066) to control the environment by using
        auxiliary controllers and Gaussian process to adopt the controller parameters (mutable_params) to safely
        increase the performance.

        :param mutable_params: safe inital controller parameters to adopt
        :param abort_reward: factor to multiply with the initial reward to give back an abort_reward-times higher
               negative reward in case of limit exceeded
        :param kernel: kernel for the Gaussian process unsing GPy
        :param gp_params: kernel parameters like bounds and lengthscale
        :param ctrls: Controllers that are feed with the observations and exert actions on the environment
        :param obs_template:
            Template describing how the observation array should be transformed and passed to the internal controllers.
            The key must match the name field of an internal controller.
            The values are a list of:
                - list of strings
                    - matching variable names of the state
                    - must match self.obs_varnames
                    - will be substituted by the values on runtime
                    - will be passed as an np.array of floats to the controller
                - np.ndarray of floats (to be passed statically to the controller)
                - a mixture of static and dynamic values in one parameter is not supported for performance reasons.
            The values will be passed as parameters to the controllers step function.
        :param obs_varnames: list of variable names that match the values of the observations
         passed in the act function. Will be automatically set by the Runner class
        """
        self.params = MutableParams(
            list(mutable_params.values()) if isinstance(mutable_params, dict
                                                        ) else mutable_params)
        self.kernel = kernel
        self.bounds = gp_params['bounds']
        self.noise_var = gp_params['noise_var']
        self.prior_mean = gp_params['prior_mean']
        self.safe_threshold = gp_params['safe_threshold']
        self.explore_threshold = gp_params['explore_threshold']

        self.abort_reward = abort_reward
        self.episode_reward = None
        self.optimizer = None
        self.inital_performance = None
        self.last_best_performance = None
        self.performance = None

        self._iterations = 0

        super().__init__(ctrls, obs_template, obs_varnames, **kwargs)
        self.history.cols = ['J', 'Params']

    def reset(self):
        """
        Resets the kernel, episodic reward and the optimizer
        """
        # reinstantiate kernel

        kernel_params = self.kernel.to_dict()
        cls_name = kernel_params['class']
        mod = importlib.import_module('.'.join(cls_name.split('.')[:-1]))
        cls = getattr(mod, cls_name.split('.')[-1])
        remaining_params = {
            k: v
            for k, v in kernel_params.items() if k not in {'class', 'useGPU'}
        }
        self.kernel = cls(**remaining_params)

        self.params.reset()
        self.optimizer = None
        self.episode_reward = 0
        self.inital_performance = None
        self.last_best_performance = None
        self.performance = None

        self._iterations = 0

        return super().reset()

    def observe(self, reward, terminated):
        """
        Makes an observation of the enviroment.
        If terminated, then caclulates the performance and the next values for the parameters using safeopt

        :param reward: reward of the simulation step
        :param terminated: True if episode is over or aborted
        :return:
        """
        self._iterations += 1
        self.episode_reward += reward or 0
        if terminated:
            # calculate MSE, divide Summed error by length of measurement
            self.performance = self.episode_reward / self._iterations
            # safeopt update step
            self.update_params()
            # reset for new episode
            self.prepare_episode()
        # on other steps we don't need to do anything

    def update_params(self):
        """
        Sets up the Gaussian process in the first episodes, updates the parameters in the following.
        """
        if self.optimizer is None:
            # First Iteration
            self.inital_performance = self.performance

            # Norm for Safe-point
            # J = 1 / self.episode_reward / self.inital_Performance

            self.last_best_performance = self.performance

            # Define Mean "Offset": Like BK: Assume Mean = Threshold (BK = 0, now = 20% below first (safe) J: means: if
            # new Performance is 20 % lower than the inital we assume as unsafe)
            mf = GPy.core.Mapping(len(self.bounds), 1)
            mf.f = lambda x: self.prior_mean * self.performance
            mf.update_gradients = lambda a, b: 0
            mf.gradients_X = lambda a, b: 0

            gp = GPy.models.GPRegression(
                np.array([self.params[:]]),  # noqa
                np.array([[self.performance]]),
                self.kernel,
                noise_var=self.noise_var,
                mean_function=mf)
            self.optimizer = SafeOptSwarm(
                gp,
                self.safe_threshold * self.performance,
                bounds=self.bounds,
                threshold=self.explore_threshold * self.performance)

        else:
            if np.isnan(self.episode_reward):
                # set r to doubled (negative!) initial reward
                self.performance = self.abort_reward * self.inital_performance
                # toDo: set reward to -inf and stop agent?
                # warning mit logger
                logger.warning(
                    'UNSAFE! Limit exceeded, epsiode abort, give a reward of {} times the'
                    'initial reward'.format(self.abort_reward))

            self.optimizer.add_new_data_point(self.params[:], self.performance)

        self.history.append([self.performance, self.params[:]])
        self.params[:] = self.optimizer.optimize()

        if self.has_improved:
            # if performance has improved store the current last index of the df
            self.best_episode = self.history.df.shape[0] - 1

            self.last_best_performance = self.performance

    def render(self) -> Figure:
        """
        Renders the results for the performance
        """
        figure, ax = plt.subplots()
        if self.optimizer.x.size > 3:
            # check if the dimensionality is less then 4 dimension
            logger.info(
                'Plotting of GP landscape not possible for then 3 dimensions')
            return figure
        self.optimizer.plot(1000, figure=figure)

        # mark best performance in green
        y, x = self.history.df.loc[self.best_episode, ['J', 'Params']]

        if len(x) == 1:
            ax.scatter([x], [y],
                       s=20 * 10,
                       marker='x',
                       linewidths=3,
                       color='g')
        elif len(x) == 2:
            ax.plot(x[0], x[1], 'og')
        else:
            logger.warning('Choose appropriate number of control parameters')

        plt.show()
        return figure

    def prepare_episode(self):
        """
        Prepares the next episode; reset iteration counting variable and call superclass to reset controllers
        """
        self._iterations = 0

        super().prepare_episode()

    @property
    def has_improved(self) -> bool:
        """
        Defines if the performance increased or stays constant

        :return: True, if performance was increased or equal, else False
        """
        return self.performance >= self.last_best_performance
    # For 1D example, if Ki should be adjusted
    elif adjust == 'Ki':
        mutable_params = dict(currentI=MutableFloat(5))
        current_dqp_iparams = PI_params(kP=0.005, kI=mutable_params['currentI'], limits=(-1, 1))

    # For 2D example, choose Kp and Ki as mutable parameters
    elif adjust == 'Kpi':
        mutable_params = dict(currentP=MutableFloat(0.04), currentI=MutableFloat(11.8))
        current_dqp_iparams = PI_params(kP=mutable_params['currentP'], kI=mutable_params['currentI'],
                                        limits=(-1, 1))

    # Define a current sourcing inverter as master inverter using the pi and droop parameters from above
    ctrl = MultiPhaseDQCurrentSourcingController(current_dqp_iparams, ts_sim=delta_t,
                                                 ts_ctrl=undersample * delta_t, name='master', f_nom=net.freq_nom)

    i_ref = MutableParams([MutableFloat(f) for f in i_ref1])
    #####################################
    # Definition of the optimization agent
    # The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example
    # Arguments described above
    # History is used to store results
    agent = SafeOptAgent(mutable_params,
                         abort_reward,
                         j_min,
                         kernel,
                         dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, safe_threshold=safe_threshold,
                              explore_threshold=explore_threshold), [ctrl],
                         dict(master=[[f'lc.inductor{k}.i' for k in '123'], i_ref]), history=FullHistory()
                         )

    #####################################
def run_experiment(len_kp, len_ki):
    if isfile(f'{save_folder}/{len_kp:.4f},{len_ki:.4f}.txt'):
        with open(f'{save_folder}/{len_kp:.4f},{len_ki:.4f}.txt', 'r') as f:
            return strtobool(f.read().strip())

    rew = Reward(i_limit=iLimit,
                 i_nominal=iNominal,
                 mu_c=mu,
                 max_episode_steps=max_episode_steps,
                 obs_dict=[[f'lc.inductor{k}.i' for k in '123'],
                           'master.phase', [f'master.SPI{k}' for k in 'dq0']])

    #####################################
    # Definitions for the GP
    prior_mean = 0  # 2  # mean factor of the GP prior mean which is multiplied with the first performance of the
    # initial set
    noise_var = 0.001  # 0.001 ** 2  # measurement noise sigma_omega
    prior_var = 2  # prior variance of the GP

    bounds = None
    lengthscale = None
    if adjust == 'Kp':
        bounds = [(0.0001, 0.1)]  # bounds on the input variable Kp
        lengthscale = [
            .025
        ]  # length scale for the parameter variation [Kp] for the GP

    # For 1D example, if Ki should be adjusted
    if adjust == 'Ki':
        bounds = [(0, 20)]  # bounds on the input variable Ki
        lengthscale = [
            10
        ]  # length scale for the parameter variation [Ki] for the GP

    # For 2D example, choose Kp and Ki as mutable parameters (below) and define bounds and lengthscale for both of them
    if adjust == 'Kpi':
        bounds = [(0.001, 0.07), (2, 150)]
        lengthscale = [0.012, 30.]

    df_len = pd.DataFrame({
        'lengthscale': lengthscale,
        'bounds': bounds,
        'balanced_load': balanced_load,
        'barrier_param_mu': mu
    })

    # The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times
    # the initial performance: safe_threshold = 0.8 means. Performance measurement for optimization are seen as
    # unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!)
    # parameter set
    safe_threshold = 0
    j_min = cal_j_min(phase_shift, amp_dev)  # Used for normalization

    # The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop
    # expanding points eventually.
    # The following variable is multiplied with the first performance of the initial set by the factor below:
    explore_threshold = 0

    # Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of
    # limit exceeded
    # has to be negative due to normalized performance (regarding J_init = 1)
    abort_reward = 100 * j_min

    # Definition of the kernel
    kernel = GPy.kern.Matern32(input_dim=len(bounds),
                               variance=prior_var,
                               lengthscale=lengthscale,
                               ARD=True)

    #####################################
    # Definition of the controllers
    mutable_params = None
    current_dqp_iparams = None
    if adjust == 'Kp':
        # mutable_params = parameter (Kp gain of the current controller of the inverter) to be optimized using
        # the SafeOpt algorithm
        mutable_params = dict(currentP=MutableFloat(0.04))

        # Define the PI parameters for the current controller of the inverter
        current_dqp_iparams = PI_params(kP=mutable_params['currentP'],
                                        kI=12,
                                        limits=(-1, 1))

    # For 1D example, if Ki should be adjusted
    elif adjust == 'Ki':
        mutable_params = dict(currentI=MutableFloat(5))
        current_dqp_iparams = PI_params(kP=0.005,
                                        kI=mutable_params['currentI'],
                                        limits=(-1, 1))

    # For 2D example, choose Kp and Ki as mutable parameters
    elif adjust == 'Kpi':
        mutable_params = dict(currentP=MutableFloat(0.04),
                              currentI=MutableFloat(11.8))
        current_dqp_iparams = PI_params(kP=mutable_params['currentP'],
                                        kI=mutable_params['currentI'],
                                        limits=(-1, 1))

    # Define a current sourcing inverter as master inverter using the pi and droop parameters from above
    ctrl = MultiPhaseDQCurrentSourcingController(current_dqp_iparams,
                                                 delta_t,
                                                 undersampling=undersample,
                                                 name='master',
                                                 f_nom=net.freq_nom)

    i_ref = MutableParams([MutableFloat(f) for f in i_ref1])
    #####################################
    # Definition of the optimization agent
    # The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example
    # Arguments described above
    # History is used to store results
    agent = SafeOptAgent(
        mutable_params,
        abort_reward,
        j_min,
        kernel,
        dict(bounds=bounds,
             noise_var=noise_var,
             prior_mean=prior_mean,
             safe_threshold=safe_threshold,
             explore_threshold=explore_threshold),
        [ctrl],
        dict(master=[[f'lc.inductor{k}.i' for k in '123'], i_ref]),
        history=FullHistory(),
    )

    #####################################
    # Definition of the environment using a FMU created by OpenModelica
    # (https://www.openmodelica.org/)
    # Using an inverter supplying a load
    # - using the reward function described above as callable in the env
    # - viz_cols used to choose which measurement values should be displayed (here, only the 3 currents across the
    #   inductors of the inverters are plotted. Labels and grid is adjusted using the PlotTmpl (For more information,
    #   see UserGuide)
    # - inputs to the models are the connection points to the inverters (see user guide for more details)
    # - model outputs are the the 3 currents through the inductors and the 3 voltages across the capacitors

    if include_simulate:

        # Defining unbalanced loads sampling from Gaussian distribution with sdt = 0.2*mean
        # r_load = Load(R, 0.1 * R, balanced=balanced_load, tolerance=0.1)
        # l_load = Load(L, 0.1 * L, balanced=balanced_load, tolerance=0.1)
        # i_noise = Noise([0, 0, 0], [0.0023, 0.0015, 0.0018], 0.0005, 0.32)

        # if no noise should be included:
        r_load = Load(R, 0 * R, balanced=balanced_load)
        l_load = Load(L, 0 * L, balanced=balanced_load)

        def reset_loads():
            r_load.reset()
            l_load.reset()

        plotter = PlotManager(agent,
                              save_results=save_results,
                              save_folder=save_folder,
                              show_plots=show_plots)

        def ugly_foo(t):

            if t >= .05:
                i_ref[:] = i_ref2
            else:
                i_ref[:] = i_ref1

            return partial(l_load.give_value, n=2)(t)

        env = gym.make(
            'openmodelica_microgrid_gym:ModelicaEnv_test-v1',
            # reward_fun=Reward().rew_fun,
            reward_fun=rew.rew_fun_c,
            # time_step=delta_t,
            viz_cols=[
                PlotTmpl([[f'lc.inductor{i}.i'
                           for i in '123'], [f'master.SPI{i}' for i in 'abc']],
                         callback=plotter.xylables_i_abc,
                         color=[['b', 'r', 'g'], ['b', 'r', 'g']],
                         style=[[None], ['--']]),
                PlotTmpl([[f'master.m{i}' for i in 'abc']],
                         callback=lambda fig: plotter.update_axes(
                             fig,
                             title='Simulation',
                             ylabel='$m_{\mathrm{abc}}\,/\,\mathrm{}$')),
                PlotTmpl([[f'master.CVI{i}'
                           for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']],
                         callback=plotter.xylables_i_dq0,
                         color=[['b', 'r', 'g'], ['b', 'r', 'g']],
                         style=[[None], ['--']])
            ],
            log_level=logging.INFO,
            viz_mode='episode',
            max_episode_steps=max_episode_steps,
            model_params={
                'lc.resistor1.R': partial(r_load.give_value, n=0),
                'lc.resistor2.R': partial(r_load.give_value, n=1),
                'lc.resistor3.R': partial(r_load.give_value, n=2),
                'lc.inductor1.L': partial(l_load.give_value, n=0),
                'lc.inductor2.L': partial(l_load.give_value, n=1),
                'lc.inductor3.L': ugly_foo
            },
            model_path='../../omg_grid/grid.paper.fmu',
            # model_path='../omg_grid/omg_grid.Grids.Paper_SC.fmu',
            net=net,
            history=FullHistory(),
            action_time_delay=1 * undersample)

        runner = MonteCarloRunner(agent, env)

        runner.run(num_episodes,
                   n_mc=n_MC,
                   visualise=True,
                   prepare_mc_experiment=reset_loads)

        with open(f'{save_folder}/{len_kp:.4f},{len_ki:.4f}.txt', 'w') as f:
            print(f'{agent.unsafe}', file=f)

        return agent.unsafe