Ejemplo n.º 1
0
    def explore(self, x_operator_func, y_operator_func, rational=False):
        subfolders = [self.type_of_experiment]
        stats = pd.DataFrame([])
        for df, country, period in self.get_country_data():
            print('\n\n========== ========== ========== ==========')
            print('Exploring {}'.format(country))
            if country not in self.info.keys():
                self.info[country] = {}

            self.set_underlying_model(df)

            for variable in [self.get_variables()] + [variable for variable in self.get_variables()]:
                variable = Field(variable)
                base_name = str(variable)
                if 'all' not in self.accepted_variables and base_name not in self.accepted_variables:
                    continue
                print('\nVariable {}'.format(base_name))

                if base_name not in self.info[country].keys():
                    self.info[country][base_name] = []

                # ---------- fit eqdiff ----------
                data_manager = DataManager()
                data_manager.add_variables(variable)
                # data_manager.add_regressors(self.get_regressors())
                data_manager.set_domain()

                data_manager.set_X_operator(x_operator_func(rational=rational))
                data_manager.set_y_operator(y_operator_func())
                pde_finder = self.fit_eqdifff(data_manager)
                stats = pd.concat([stats, pd.concat([pd.DataFrame([[country, period.label, period.fecha]],
                                                                  index=pde_finder.coefs_.index,
                                                                  columns=['country', 'medidas',
                                                                           'fecha_final']),
                                                     pde_finder.coefs_],
                                                    axis=1)], axis=0, sort=True)
                # ---------- plot ----------
                with savefig('{}_{}_coeficients.png'.format(base_name, country), self.experiment_name,
                             subfolders=subfolders, format='png'):
                    self.plot_coefficients(pde_finder)
                    plt.xscale('log')

                with savefig('{}_{}_fitvsreal.png'.format(base_name, country), self.experiment_name,
                             subfolders=subfolders, format='png'):
                    self.plot_fitted_and_real(pde_finder, data_manager, col="blue", subinit=None, sublen=None)

                # --------- predictions ---------
                predictions_temp = self.optimize_predictions(pde_finder, variable, x_operator_func, y_operator_func,
                                                             data_manager, period, rational)

                self.info[country][base_name].append({'coefs': pde_finder.coefs_,
                                                      'period': period,
                                                      'data_real': data_manager.field,
                                                      'predictions': predictions_temp,
                                                      'data_raw': df})

                stats.to_csv(config.get_filename(filename='{}_coefs.csv'.format(base_name),
                                                 experiment=self.experiment_name,
                                                 subfolders=[self.type_of_experiment]))
                self.plot_results()
Ejemplo n.º 2
0
    def explore_eqdiff_fitting(self, derivative_in_y, derivatives2explore, poly2explore, rational=False, getXfunc=get_x_operator_func):
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_eqdiff_fitting': {
            'date': datetime.now(),
            'derivative_in_y': derivative_in_y,
            'derivatives2explore': derivatives2explore,
            'poly2explore': poly2explore}
        })

        rsquares = pd.DataFrame(np.nan, columns=poly2explore, index=derivatives2explore)
        for poly_degree in poly2explore:
            print("\n---------------------")
            print("Polynomial degree: {}".format(poly_degree))
            print("Derivative order:", end='')

            for derivative_depth in derivatives2explore:
                print(" {}".format(derivative_depth), end='')
                data_manager = self.get_data_manager()
                data_manager.set_X_operator(getXfunc(derivative_depth, poly_degree, rational=rational))
                data_manager.set_y_operator(
                    get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivative_depth)))

                pde_finder = self.fit_eqdifff(data_manager)
                rsquares.loc[derivative_depth, poly_degree] = np.mean(self.get_rsquare_of_eqdiff_fit(pde_finder,
                                                                                                     data_manager).values)

                subname = '_y{}_der_x{}_pol{}'.format(derivative_in_y, derivative_depth, poly_degree)
                # with savefig('feature_importance_der{}'.format(subname), self.experiment_name,
                #              subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'feature_importances']):
                #     self.plot_feature_importance(pde_finder)

                with savefig('fit_vs_real_{}'.format(subname),
                             self.experiment_name,
                             subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'fit_vs_real']):
                    self.plot_fitted_vs_real(pde_finder, data_manager)

                with savefig('fit_and_real_{}'.format(subname),
                             self.experiment_name,
                             subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real']):
                    self.plot_fitted_and_real(pde_finder, data_manager)

                with savefig('zoom_fit_and_real{}'.format(subname),
                             self.experiment_name,
                             subfolders=['derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real_zoom']):
                    self.plot_fitted_and_real(pde_finder, data_manager, subinit=self.sub_set_init,
                                              sublen=self.sub_set_len)

        if derivative_in_y == -1:
            # because we want to plot the maximum derivative value.
            rsquares.index = rsquares.index + 1
        save_csv(rsquares, 'rsquares_eqfit_der_y{}_rational{}'.format(derivative_in_y, rational), self.experiment_name)

        # ---------- plot heatmap of rsquares ----------
        with savefig('rsquares_eqfit_der_y{}_rational{}'.format(derivative_in_y, rational), self.experiment_name):
            plt.close('all')
            sns.heatmap(rsquares * (rsquares > 0), annot=True)
            plt.xlabel("Polynomial max order")
            plt.ylabel("Derivative max order")
            plt.title("rsquares for derivative in y {}".format(derivative_in_y))
Ejemplo n.º 3
0
    def explore_phase_diagram_delayed(self, prediction_methods, max_delay_inl_x, poly_degree, delay_in_y=0,
                                      rational=False, getXfunc=get_x_operator_func_delay):
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_phase_diagram': {
            'date': datetime.now(),
            'prediction_methods': prediction_methods,
            'delay_in_y': delay_in_y,
            'max_delay_in_x': max_delay_in_x,
            'poly_degree': poly_degree}
        })

        # ----------------------------------------
        prediction_methods = list(sorted(prediction_methods))

        data_manager = self.get_data_manager()
        data_manager.set_X_operator(getXfunc(max_delay_in_x, poly_degree, rational))
        data_manager.set_y_operator(lambda field: Delay(axis_name='t', delay=delay_in_y) * field)

        # ---------- fit eqdiff ----------
        pde_finder = self.fit_eqdifff(data_manager)

        # ---------- predictions ----------
        subfolders = ['phase_diagram']
        predictions = load_csv('phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders)
        real, predictions = self.do_predictions(prediction_methods, pde_finder, data_manager,
                                                self.phase_diagram_horizon, num_evaluations=1,
                                                predictions=predictions)
        print(predictions)
        save_csv(real, 'phase_diagram_real_data', self.experiment_name, subfolders=subfolders)
        save_csv(predictions, 'phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders)

        # if we want to append new methods.
        prediction_methods = predictions.method.unique()
        method_colors = {pred_method: self.colors[i] for i, pred_method in enumerate(prediction_methods)}

        # ---------- evaluate statistics ----------
        for var in data_manager.field.data:
            var_name = var.get_full_name()

            # ---------- plot phase diagram ----------
            with savefig('phase_diagram_{}_real'.format(var_name), self.experiment_name, subfolders=subfolders):
                self.plot_phase_diagram(real[var_name].values.ravel(), dx=data_manager.domain.step_width['t'],
                                        method='real', var_name=var_name, color='black')

            for method, df in predictions.groupby('method'):
                with savefig('phase_diagram_{}_{}'.format(var_name, method), self.experiment_name,
                             subfolders=subfolders):
                    self.plot_phase_diagram(df[var_name].values.ravel(), dx=data_manager.domain.step_width['t'],
                                            method=method, var_name=var_name,
                                            color=method_colors[method])

            plt.close("all")
Ejemplo n.º 4
0
    def plot_results(self):
        for country, vars_info in self.info.items():
            for _, list_info in vars_info.items():
                # ------ original data ------
                country_data = self.df_data.loc[self.df_data['Countries and territories'] == country, :]
                country_data = country_data.sort_values(by='DateRep')
                if self.cumulative:
                    country_data['Deaths'] = country_data['Deaths'].cumsum()
                    country_data['Cases'] = country_data['Cases'].cumsum()

                original_var_names = [str(var)[:-3] for var in list_info[0]['data_real'].data]
                var_names = [english2spanish_dict[v] for v in original_var_names]
                lines = {}
                with savefig('{}_predict_{}.png'.format(country, '_'.join(var_names)), self.experiment_name,
                             subfolders=[self.type_of_experiment], format='png'):
                    nvars = len(original_var_names)
                    fig, ax = plt.subplots(ncols=nvars, nrows=1, figsize=(8 * nvars, 8))
                    if nvars == 1:
                        ax = [ax]

                    for i, (temp_ax, original_var_name, var_name) in enumerate(zip(ax, original_var_names, var_names)):
                        temp_ax.set_title(english2spanish_dict[country] + ' ' + var_name.lower())
                        temp_ax.set_xlabel('Time')
                        temp_ax.set_ylabel(var_name)

                        # ------------------ plot real ------------------
                        lab = 'Data real {}'.format(var_name.lower())
                        lines[lab], = temp_ax.plot(country_data['DateRep'], country_data[original_var_name], '.-',
                                               c='tab:green', label=lab)

                        # xmin = np.Inf
                        real = []
                        t_real = []
                        ymax = 0
                        for info in list_info:
                            var = info['data_real'].data[i]
                            ymax = np.max((ymax, var.data.max() * 2))
                            # xmin = np.min((xmin, min([d.data.min() for d in info['data_real'].data])))

                            # plt.xlim(left=xmin)
                            temp_ax.set_ylim((0, ymax))

                            # ------------------ plot real ------------------
                            lab = 'Data real {} for train'.format(var_name.lower())
                            real2use = var.data.tolist()
                            real += real2use
                            dt = var.domain.step_width['t']
                            t_real += np.arange(var.domain.lower_limits['t'], var.domain.upper_limits['t'] + dt, dt).tolist()
                            t_real2use = info['data_raw'].loc[info['data_raw']['total_days'].isin(t_real), 'DateRep']
                            lines[lab], = temp_ax.plot(t_real2use, real2use, '.-', c='k', label=lab)

                            # ------------------ plot prediction ------------------
                            lab = 'Predictcion {}'.format(info['period'].label.lower())
                            tmin = info['data_raw'].loc[
                                info['data_raw']['total_days'].isin(
                                    info['predictions'].index), 'DateRep'].values.min()
                            t = pd.date_range(tmin, periods=info['predictions'].shape[0], freq='D')

                            lines[lab], = temp_ax.plot(t,
                                                   info['predictions'].loc[:, str(var)].values, '-',
                                                   label=lab)

                            tosave = info['predictions']
                            tosave['real'] = np.nan

                            tosave.loc[[True if j in t_real2use.tolist() else False for j in t], 'real'] \
                                = [r for j, r in zip(t_real2use, real) if j in t]
                            tosave.to_csv(
                                config.get_filename(
                                    filename='{}_predictions_{}_{}.csv'.format('_'.join(var_names), info['period'].label.lower(), var_name),
                                    experiment=self.experiment_name,
                                    subfolders=[self.type_of_experiment]))

                            try:
                                lab = 'Fin {}'.format(info['period'].label.lower())
                                lines[lab] = temp_ax.axvline(pd.to_datetime(info['period'].fecha, dayfirst=True), c='r',
                                                         linestyle='-.', ymin=0,
                                                         ymax=ymax / 2,
                                                         label=lab)
                            except:
                                pass

                        temp_ax.grid(axis='x', color='gray', linestyle='-.', linewidth=1, alpha=0.65)
                        temp_ax.legend(list(lines.values()), list(lines.keys()))

                    plt.tight_layout()
Ejemplo n.º 5
0
    def explore_phase_diagram(self, prediction_methods, derivative_in_y, derivatives_in_x, poly_degree, rational=False,
                              method_label_dict={}, reload=True, starting_point={"t": 0}, prediction_methods2plot=None,
                              getXfunc=get_x_operator_func):
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_phase_diagram': {
            'date': datetime.now(),
            'prediction_methods': prediction_methods,
            'derivative_in_y': derivative_in_y,
            'derivatives_in_x': derivatives_in_x,
            'poly_degree': poly_degree}
        })

        subfolders = ['phase_diagram']

        # ----------------------------------------
        prediction_methods = list(sorted(prediction_methods))
        if prediction_methods2plot is None:
            prediction_methods2plot = prediction_methods

        data_manager = self.get_data_manager()
        data_manager.set_X_operator(getXfunc(derivatives_in_x, poly_degree, rational))
        data_manager.set_y_operator(get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))

        # ---------- fit eqdiff ----------
        pde_finder = self.fit_eqdifff(data_manager)

        base_name = 'dery{}_derx{}_poly{}'.format(derivative_in_y, derivatives_in_x, poly_degree)
        # pde_finder = self.load_fitsave_eqdifff(self, data_manager)
        with savefig('coeficients_{}_dery{}_derx{}_poly{}'.format('_'.join(prediction_methods), derivative_in_y,
                                                                  derivatives_in_x, poly_degree), self.experiment_name,
                     subfolders=subfolders):
            self.plot_coefficients(pde_finder)

        # ---------- predictions ----------
        predictions = load_csv('phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders)
        if predictions is not None and not reload and predictions.method.isin(prediction_methods).any():
            predictions = predictions.loc[~predictions.method.isin(prediction_methods), :]

        for i, prediction_method in enumerate(prediction_methods):
            if not reload or (reload and prediction_method not in predictions.method.unique()):
                df_predictions = pde_finder.integrate2(dm=data_manager,
                                                       dery=derivatives_in_x - derivative_in_y if derivative_in_y < 0 else derivative_in_y,
                                                       starting_point=starting_point,
                                                       horizon=self.phase_diagram_horizon,
                                                       method=prediction_method)
                df_predictions['method'] = prediction_method
                predictions = pd.concat([predictions if predictions is not None else
                                         pd.DataFrame([], columns=df_predictions.columns)] + [df_predictions])

        real = evaluator.get_real_values([Identity()],
                                         dm=data_manager,
                                         starting_point=starting_point,
                                         domain_variable2predict='t',
                                         horizon=self.phase_diagram_horizon)
        real = pd.concat(real)
        real = real.reset_index()
        real['method'] = 'real'

        print(predictions)
        save_csv(real, 'phase_diagram_real_data', self.experiment_name, subfolders=subfolders)
        save_csv(predictions, 'phase_diagram_predictions_data', self.experiment_name, subfolders=subfolders)

        # if we want to append new methods.
        prediction_methods = set(predictions.method.unique()).intersection(set(prediction_methods2plot))
        method_colors = {pred_method: self.colors[i] for i, pred_method in enumerate(prediction_methods)}

        # ---------- evaluate statistics ----------
        for var in data_manager.field.data:
            var_name = var.get_full_name()

            # ---------- plot phase diagram ----------
            with savefig('phase_diagram_{}_{}'.format(var_name, '-'.join(prediction_methods)), self.experiment_name,
                         subfolders=subfolders):
                fig, allax = plt.subplots(nrows=len(prediction_methods),
                                          figsize=(15, len(prediction_methods) * 15), sharex=True)
                for i, (method, df) in enumerate(
                        predictions.loc[predictions.method.isin(prediction_methods), :].groupby('method')):
                    ax = allax if len(prediction_methods) == 1 else allax[i]
                    x, dx = self.plot_phase_diagram(real[var_name].values.ravel(),
                                                    dx=data_manager.domain.step_width['t'],
                                                    method='real', var_name=var_name, color='black', ax=ax)
                    ax.set_xlim((np.min(x) - (np.max(x) - np.min(x)) / 2, np.max(x) + (np.max(x) - np.min(x)) / 2))
                    ax.set_ylim(
                        (np.min(dx) - (np.max(dx) - np.min(dx)) / 2, np.max(dx) + (np.max(dx) - np.min(dx)) / 2))
                    self.plot_phase_diagram(df[var_name].values.ravel(), dx=data_manager.domain.step_width['t'],
                                            method=method, var_name=var_name,
                                            color=method_colors[method], ax=ax)
                    ax.legend()
            plt.close("all")

            # ---------- plot series ----------
            with savefig('predictions_{}_{}'.format(var_name, '-'.join(prediction_methods)), self.experiment_name,
                         subfolders=subfolders):
                fig, allax = plt.subplots(nrows=len(prediction_methods),
                                          figsize=(15, len(prediction_methods) * 15), sharex=True)
                for i, (method, df) in enumerate(
                        predictions.loc[predictions.method.isin(prediction_methods), :].groupby('method')):
                    ax = allax if len(prediction_methods) == 1 else allax[i]
                    real_series = real[var_name].values.ravel()
                    ax.plot(real['index'].values.ravel() * data_manager.domain.step_width['t'],
                            real[var_name].values.ravel(), label='real', c='black')
                    ax.set_ylim((np.min(real_series) - (np.max(real_series) - np.min(real_series)) / 2,
                                 np.max(real_series) + (np.max(real_series) - np.min(real_series)) / 2))
                    ax.plot(real['index'].values.ravel() * data_manager.domain.step_width['t'],
                            df[var_name].values.ravel(), label='model',
                            c=method_colors[method])

                    ax.set_xlabel(data_manager.domain.axis_names[0], fontsize=20)
                    ax.set_ylabel(varname2latex(var_name, derivative=0), fontsize=20, rotation=0)
                    ax.legend()
            plt.close("all")
Ejemplo n.º 6
0
    def explore_predictions(self, prediction_methods, derivative_in_y, derivatives_in_x, poly_degree,
                            method_label_dict={}, getXfunc=get_x_operator_func):
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_predictions': {
            'date': datetime.now(),
            'prediction_methods': prediction_methods,
            'derivative_in_y': derivative_in_y,
            'derivatives_in_x': derivatives_in_x,
            'poly_degree': poly_degree}
        })

        # ----------------------------------------
        prediction_methods = list(sorted(prediction_methods))

        data_manager = self.get_data_manager()
        data_manager.set_X_operator(getXfunc(derivatives_in_x, poly_degree))
        data_manager.set_y_operator(get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))

        # ---------- fit eqdiff ----------
        pde_finder = self.fit_eqdifff(data_manager)

        # ---------- predictions ----------
        subfolders = ['predictions']
        predictions = load_csv('future_predictions_data', self.experiment_name, subfolders=subfolders)
        real, predictions = self.do_predictions(prediction_methods=prediction_methods,
                                                pde_finder=pde_finder,
                                                dery=derivatives_in_x + 1,
                                                data_manager=data_manager,
                                                horizon=self.horizon,
                                                num_evaluations=self.num_evaluations,
                                                predictions=predictions)
        save_csv(real, 'future_real_data', self.experiment_name, subfolders=subfolders)
        save_csv(predictions, 'future_predictions_data', self.experiment_name, subfolders=subfolders)

        # if we want to append new methods.
        prediction_methods = predictions.method.unique()
        method_colors = {pred_method: self.colors[i] for i, pred_method in enumerate(prediction_methods)}

        if method_label_dict == {}:
            method_label_dict = {method: method for method in prediction_methods}

        # ---------- evaluate statistics ----------
        for var in data_manager.field.data:
            var_name = var.get_full_name()

            # ---------- statistics ----------
            rsq = pd.DataFrame(np.nan, columns=prediction_methods, index=np.arange(self.horizon))
            mape = pd.DataFrame(np.nan, columns=prediction_methods, index=np.arange(self.horizon))
            mape_std = pd.DataFrame(np.nan, columns=prediction_methods, index=np.arange(self.horizon))

            # check if there are methods already calculated
            old_rsq = load_csv('r2_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders)
            old_mape = load_csv('mape_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders)
            if old_rsq is None:
                old_methods = []
            else:
                old_methods = old_rsq.columns
                rsq[old_methods] = old_rsq
                mape[old_methods] = old_mape

            # calculate statistics
            for method, df in predictions.groupby('method'):
                if method in old_methods:
                    continue

                for (ix_p, dfp), (ix_r, dfr) in zip(df.groupby(level='index'), real.groupby(level='index')):
                    assert ix_p == ix_r
                    rsq.loc[ix_p, method] = evaluator.rsquare(dfp[var_name], dfr[var_name])
                    mape.loc[ix_p, method] = evaluator.mape(dfp[var_name], dfr[var_name])
                    mape_std.loc[ix_p, method] = evaluator.mape_sd(dfp[var_name], dfr[var_name])

            # save
            save_csv(rsq, 'r2_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders)
            save_csv(mape, 'mape_predictions_{}'.format(var_name), self.experiment_name, subfolders=subfolders)

            # ---------- plot statistics ----------
            with savefig('R2_{}'.format(var_name), self.experiment_name, subfolders=subfolders):
                fig, ax = plt.subplots()
                for method in rsq.columns:
                    ax.plot(rsq.index[rsq[method] > 0], rsq[method][rsq[method] > 0], '.-', c=method_colors[method],
                            label=method_label_dict[method])
                plt.legend()

            with savefig('mape_{}'.format(var_name), self.experiment_name, subfolders=subfolders):
                fig, ax = plt.subplots()
                for method in rsq.columns:
                    ax.plot(mape.index[mape[method] < 1], mape[method][mape[method] < 1], c=method_colors[method],
                            label=method_label_dict[method])
                    ax.fill_between(mape.index[mape[method] < 1],
                                    mape[method][mape[method] < 1] - mape_std[method][mape[method] < 1],
                                    mape[method][mape[method] < 1] + mape_std[method][mape[method] < 1],
                                    color=method_colors[method], alpha=0.4)
                plt.legend()
            plt.close("all")
Ejemplo n.º 7
0
    def explore_noise_discretization(self, noise_range, discretization_range, derivative_in_y, derivatives_in_x,
                                     poly_degree, std_of_discrete_grad=False):
        """

        :param noise_range:
        :param discretization_range:
        :param derivative_in_y:
        :param derivatives_in_x:
        :param poly_degree:
        :param std_of_discrete_grad: True if we wan to calculate the std of the gradient of the series using the discretized version. Otherwise will be the original one.
        :return:
        """
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_noise_discretization': {
            'date': datetime.now(),
            'noise_range': noise_range,
            'discretization_range': discretization_range,
            'derivative_in_y': derivative_in_y,
            'derivatives_in_x': derivatives_in_x,
            'poly_degree': poly_degree}
        })

        # ----------------------------------------
        rsquares = pd.DataFrame(np.nan, index=noise_range, columns=discretization_range)
        rsquares.index.name = "Noise"
        rsquares.columns.name = "Discretization"

        # ----------------------------------------
        data_manager = self.get_data_manager()

        std_of_vars = []
        for var in data_manager.field.data:
            series_grad = np.abs(np.gradient(var.data))
            std_of_vars.append(np.std(series_grad))
            with savefig('Distribution_series_differences_{}'.format(var.get_full_name()), self.experiment_name,
                         subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y)]):
                sns.distplot(series_grad, bins=int(np.sqrt(len(var.data))))
                plt.axvline(x=std_of_vars[-1])

        # ---------- Noise evaluation ----------
        for measure_dt in discretization_range:
            print("\n---------------------")
            print("meassure dt: {}".format(measure_dt))
            print("Noise: ", end='')
            for noise in noise_range:
                print(noise, end='')
                # choose steps with bigger dt; and sum normal noise.
                new_t = data_manager.domain.get_range("t")['t'][::measure_dt]
                domain_temp = Domain(lower_limits_dict={"t": np.min(new_t)},
                                     upper_limits_dict={"t": np.max(new_t)},
                                     step_width_dict={"t": data_manager.domain.step_width['t'] * measure_dt})

                data_manager_temp = DataManager()
                data_manager_original_temp = DataManager()
                for std, var in zip(std_of_vars, data_manager.field.data):
                    data_original = var.data[::measure_dt]

                    if std_of_discrete_grad:
                        series_grad = np.abs(np.gradient(data_original))
                        std = np.std(series_grad)

                    data = data_original + np.random.normal(loc=0, scale=std * noise, size=len(data_original))
                    data_manager_temp.add_variables(
                        Variable(data, domain_temp, domain2axis={"t": 0}, variable_name=var.name))
                    data_manager_original_temp.add_variables(
                        Variable(data_original, domain_temp, domain2axis={"t": 0}, variable_name=var.name))
                data_manager_temp.add_regressors([])
                data_manager_temp.set_domain()
                data_manager_original_temp.add_regressors([])
                data_manager_original_temp.set_domain()

                data_manager_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree))
                data_manager_temp.set_y_operator(
                    get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))
                data_manager_original_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree))
                data_manager_original_temp.set_y_operator(
                    get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))

                pde_finder = self.fit_eqdifff(data_manager_temp)

                y = data_manager_original_temp.get_y_dframe(self.testSplit)
                yhat = pd.DataFrame(pde_finder.transform(data_manager_temp.get_X_dframe(self.testSplit)),
                                    columns=y.columns)
                rsquares.loc[noise, measure_dt] = evaluator.rsquare(yhat=yhat, y=y).values
                # rsquares.loc[noise, measure_dt] = self.get_rsquare_of_eqdiff_fit(pde_finder, data_manager_temp).values

                with savefig('fit_vs_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''),
                                                                       measure_dt),
                             self.experiment_name,
                             subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_vs_real']):
                    self.plot_fitted_vs_real(pde_finder, data_manager_temp)

                with savefig('fit_and_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''),
                                                                        measure_dt),
                             self.experiment_name,
                             subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real']):
                    self.plot_fitted_and_real(pde_finder, data_manager_temp)

                with savefig('zoom_fit_and_real_der_y{}_dt{}_noise{}'.format(derivative_in_y, measure_dt,
                                                                             str(noise).replace('.', '')),
                             self.experiment_name,
                             subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real_zoom']):
                    self.plot_fitted_and_real(pde_finder, data_manager_temp, subinit=self.sub_set_init,
                                              sublen=self.sub_set_len)

        save_csv(rsquares, 'noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name)
        # plt.pcolor(rsquares * (rsquares > 0), cmap='autumn')
        # plt.yticks(np.arange(0.5, len(rsquares.index), 1), np.round(rsquares.index, decimals=2))
        # plt.xticks(np.arange(0.5, len(rsquares.columns), 1), rsquares.columns)

        # ---------- plot heatmap of rsquares ----------
        with savefig('noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name):
            rsquares.index = np.round(rsquares.index, decimals=2)
            plt.close('all')
            sns.heatmap(rsquares * (rsquares > 0), annot=True)
            plt.xlabel("Discretization (dt)")
            plt.ylabel("Noise (k*std)")
            plt.title("Noise and discretization for derivative in y {}".format(derivative_in_y))

        return rsquares