def test_fit_2(self):
        data_manager = DataManager()
        data_manager.add_variables(self.v)
        data_manager.add_variables(self.v**2)
        data_manager.set_X_operator(
            lambda field: Poly(3) *
            (PolyD({"x": 1}) * field))  # (PolyD({"x": 1})
        data_manager.set_y_operator(lambda field: D(2, "x") * field)

        pde_finder = PDEFinder(with_mean=True, with_std=True)
        pde_finder.set_fitting_parameters(cv=10, n_alphas=100, alphas=None)
        pde_finder.fit(data_manager.get_X_dframe(),
                       data_manager.get_y_dframe())
        print(pde_finder.coefs_)  # strange th value obtained

        print((pde_finder.transform(data_manager.get_X_dframe()) -
               data_manager.get_y_dframe()).abs().mean().values)
        assert np.max((pde_finder.transform(data_manager.get_X_dframe()) -
                       data_manager.get_y_dframe()).abs().mean().values) < 1e-5

        res = pde_finder.get_equation(*data_manager.get_Xy_eq())
        print(res)

        res = pde_finder.get_equation(data_manager.get_X_sym(),
                                      data_manager.get_y_sym())
        print(res)
    def test_evaluator(self):
        trainSplit = DataSplit({"x": 0.7})
        testSplit = DataSplit({"x": 0.3}, {"x": 0.7})

        data_manager = DataManager()
        data_manager.add_variables(self.v)
        data_manager.add_variables(self.x)
        data_manager.set_X_operator(
            lambda field: PolyD({"x": 1}) * field)  # (PolyD({"x": 1})
        data_manager.set_y_operator(lambda field: D(3, "x") * field)

        pde_finder = PDEFinder(with_mean=True, with_std=True)
        pde_finder.set_fitting_parameters(cv=20, n_alphas=100, alphas=None)
        pde_finder.fit(data_manager.get_X_dframe(trainSplit),
                       data_manager.get_y_dframe(trainSplit))
        print(pde_finder.coefs_)  # strange th value obtained

        real, pred = evaluate_predictions(pde_finder,
                                          data_split_operator=testSplit,
                                          dm=data_manager,
                                          starting_point={"x": -1},
                                          domain_variable2predict="x",
                                          horizon=10,
                                          num_evaluations=1)

        assert np.mean(
            real.drop(["random_split", "method"], axis=1).values -
            pred.drop(["method"], axis=1).values[1:, :]) < 0.001
    def test_integrate(self):
        trainSplit = DataSplit({"x": 0.7})
        testSplit = DataSplit({"x": 0.3}, {"x": 0.7})

        data_manager = DataManager()
        data_manager.add_variables(self.v)
        data_manager.set_X_operator(
            lambda field: PolyD({"x": 1}) * field)  # (PolyD({"x": 1})
        data_manager.set_y_operator(lambda field: D(2, "x") * field)
        data_manager.set_domain()

        pde_finder = PDEFinder(with_mean=True, with_std=True)
        pde_finder.set_fitting_parameters(cv=20, n_alphas=100, alphas=None)
        pde_finder.fit(data_manager.get_X_dframe(trainSplit),
                       data_manager.get_y_dframe(trainSplit))
        print(pde_finder.coefs_)  # strange th value obtained

        # warning!!!
        predictions_df = pde_finder.integrate([
            DataSplitOnIndex({"x": 5}) * testSplit,
            DataSplitOnIndex({"x": 20}) * testSplit
        ],
                                              data_manager,
                                              starting_point={"x": -1},
                                              domain_variable2predict="x",
                                              horizon=10)

        print(predictions_df)
    def test_get_var(self):
        data_manager = DataManager()
        data_manager.add_variables([self.v])
        data_manager.add_regressors(self.x)
        data_manager.set_domain()
        data_manager.set_X_operator(
            lambda field: PolyD({"x": 1}) * field)  # (PolyD({"x": 1})
        data_manager.set_y_operator(lambda field: D(1, "x") * field)

        assert all(data_manager.get_X_dframe().columns == ['v(x,y)', 'x(x)'])
        assert all(data_manager.get_y_dframe().columns ==
                   ['1.0*Derivative(v(x,y),x)'])
Beispiel #5
0
class SkODEFind:
    def __init__(self,
                 target_derivative_order,
                 max_derivative_order,
                 max_polynomial_order,
                 rational=False,
                 with_mean=True,
                 with_std=True,
                 alphas=150,
                 max_iter=10000,
                 cv=20,
                 use_lasso=True,
                 regressor_builders=()):
        self.data_manager = DataManager()
        self.coefs_ = None
        self.rational = rational
        self.use_lasso = use_lasso
        self.cv = cv
        self.max_iter = max_iter
        self.alphas = alphas
        self.with_std = with_std
        self.with_mean = with_mean
        self.max_polynomial_order = max_polynomial_order
        self.max_derivative_order = max_derivative_order
        self.target_derivative_order = target_derivative_order
        self.var_names = None

        self.pde_finder = PDEFinder(with_mean=self.with_mean,
                                    with_std=self.with_std,
                                    use_lasso=self.use_lasso)
        self.pde_finder.set_fitting_parameters(cv=self.cv,
                                               n_alphas=self.alphas,
                                               max_iter=self.max_iter)

        self.regressor_builders = regressor_builders

    # ---------- dictionary of functions and target ----------
    def get_x_operator_func(self):
        def x_operator(field, regressors):
            new_field = copy.deepcopy(field)
            if self.max_derivative_order > 0:
                new_field = PolyD({'t': self.max_derivative_order}) * new_field
            new_field.append(regressors)
            if self.rational:
                new_field.append(new_field.__rtruediv__(1.0))
            new_field = Poly(self.max_polynomial_order) * new_field
            return new_field

        return x_operator

    def get_y_operator_func(self):
        def y_operator(field):
            new_field = D(self.target_derivative_order, "t") * field
            return new_field

        return y_operator

    def get_regressors(self, domain, variables):
        # TODO: only works with time variable regressor
        reggressors = []
        for reg_builder in self.regressor_builders:
            for variable in variables:
                reg_builder.fit(variable.domain, variable)
                serie = reg_builder.transform(
                    domain.get_range(axis_names=[reg_builder.domain_axes_name])
                    [reg_builder.domain_axes_name])
                reggressors.append(
                    Variable(serie,
                             domain,
                             domain2axis={reg_builder.domain_axes_name: 0},
                             variable_name='{}_{}'.format(
                                 variable.get_name(), reg_builder.name)))
        return reggressors

    def prepare_data(self, X):
        # ---------- Prepare data ----------
        # no time is defined in input so invents dt=1 and starting from 0
        domain = Domain(lower_limits_dict={"t": X.index.min()},
                        upper_limits_dict={"t": X.index.max()},
                        step_width_dict={"t": np.diff(X.index)[0]})

        # define variables
        X = pd.DataFrame(X)
        variables = [
            Variable(X[series_name].values.ravel(),
                     domain,
                     domain2axis={"t": 0},
                     variable_name=series_name)
            for i, series_name in enumerate(X.columns)
        ]

        self.data_manager.add_variables(variables)
        self.data_manager.add_regressors(self.get_regressors(
            domain, variables))
        self.data_manager.set_domain()

        self.data_manager.set_X_operator(self.get_x_operator_func())
        self.data_manager.set_y_operator(self.get_y_operator_func())
        self.var_names = [
            var.get_full_name() for var in self.data_manager.field.data
        ]

    def fit(self, X: (pd.DataFrame, pd.Series), y=None):
        """
        In principle the target is not needed because it uses the X time series to fit the differential equation.
        :param X: rows are series; columns index time.
        :param y:
        :return:
        """
        self.prepare_data(X)

        # ---------- fit data ----------
        self.pde_finder = PDEFinder(with_mean=self.with_mean,
                                    with_std=self.with_std,
                                    use_lasso=self.use_lasso)
        self.pde_finder.set_fitting_parameters(cv=self.cv,
                                               n_alphas=self.alphas,
                                               max_iter=self.max_iter)

        self.pde_finder.fit(self.data_manager.get_X_dframe(),
                            self.data_manager.get_y_dframe())
        self.coefs_ = self.pde_finder.coefs_

    def predict(self, forecast_horizon):
        assert self.coefs_ is not None, 'coeffs was not defined, use set_coefs'
        times = forecast_horizon * self.data_manager.domain.step_width["t"]
        return pd.DataFrame(self.pde_finder.integrate(
            t=times + self.data_manager.domain.upper_limits["t"] -
            self.data_manager.domain.step_width["t"],
            data_manager=self.data_manager,
            dery=self.target_derivative_order),
                            index=times +
                            self.data_manager.domain.upper_limits["t"])[0]

    def __str__(self):
        return 'skodefind_target{}_maxd{}_maxpoly{}'.format(
            self.target_derivative_order, self.max_derivative_order,
            self.max_polynomial_order)
Beispiel #6
0
    def explore_noise_discretization(self, noise_range, discretization_range, derivative_in_y, derivatives_in_x,
                                     poly_degree, std_of_discrete_grad=False):
        """

        :param noise_range:
        :param discretization_range:
        :param derivative_in_y:
        :param derivatives_in_x:
        :param poly_degree:
        :param std_of_discrete_grad: True if we wan to calculate the std of the gradient of the series using the discretized version. Otherwise will be the original one.
        :return:
        """
        # ---------- save params of experiment ----------
        self.experiments.append({'explore_noise_discretization': {
            'date': datetime.now(),
            'noise_range': noise_range,
            'discretization_range': discretization_range,
            'derivative_in_y': derivative_in_y,
            'derivatives_in_x': derivatives_in_x,
            'poly_degree': poly_degree}
        })

        # ----------------------------------------
        rsquares = pd.DataFrame(np.nan, index=noise_range, columns=discretization_range)
        rsquares.index.name = "Noise"
        rsquares.columns.name = "Discretization"

        # ----------------------------------------
        data_manager = self.get_data_manager()

        std_of_vars = []
        for var in data_manager.field.data:
            series_grad = np.abs(np.gradient(var.data))
            std_of_vars.append(np.std(series_grad))
            with savefig('Distribution_series_differences_{}'.format(var.get_full_name()), self.experiment_name,
                         subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y)]):
                sns.distplot(series_grad, bins=int(np.sqrt(len(var.data))))
                plt.axvline(x=std_of_vars[-1])

        # ---------- Noise evaluation ----------
        for measure_dt in discretization_range:
            print("\n---------------------")
            print("meassure dt: {}".format(measure_dt))
            print("Noise: ", end='')
            for noise in noise_range:
                print(noise, end='')
                # choose steps with bigger dt; and sum normal noise.
                new_t = data_manager.domain.get_range("t")['t'][::measure_dt]
                domain_temp = Domain(lower_limits_dict={"t": np.min(new_t)},
                                     upper_limits_dict={"t": np.max(new_t)},
                                     step_width_dict={"t": data_manager.domain.step_width['t'] * measure_dt})

                data_manager_temp = DataManager()
                data_manager_original_temp = DataManager()
                for std, var in zip(std_of_vars, data_manager.field.data):
                    data_original = var.data[::measure_dt]

                    if std_of_discrete_grad:
                        series_grad = np.abs(np.gradient(data_original))
                        std = np.std(series_grad)

                    data = data_original + np.random.normal(loc=0, scale=std * noise, size=len(data_original))
                    data_manager_temp.add_variables(
                        Variable(data, domain_temp, domain2axis={"t": 0}, variable_name=var.name))
                    data_manager_original_temp.add_variables(
                        Variable(data_original, domain_temp, domain2axis={"t": 0}, variable_name=var.name))
                data_manager_temp.add_regressors([])
                data_manager_temp.set_domain()
                data_manager_original_temp.add_regressors([])
                data_manager_original_temp.set_domain()

                data_manager_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree))
                data_manager_temp.set_y_operator(
                    get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))
                data_manager_original_temp.set_X_operator(get_x_operator_func(derivatives_in_x, poly_degree))
                data_manager_original_temp.set_y_operator(
                    get_y_operator_func(self.get_derivative_in_y(derivative_in_y, derivatives_in_x)))

                pde_finder = self.fit_eqdifff(data_manager_temp)

                y = data_manager_original_temp.get_y_dframe(self.testSplit)
                yhat = pd.DataFrame(pde_finder.transform(data_manager_temp.get_X_dframe(self.testSplit)),
                                    columns=y.columns)
                rsquares.loc[noise, measure_dt] = evaluator.rsquare(yhat=yhat, y=y).values
                # rsquares.loc[noise, measure_dt] = self.get_rsquare_of_eqdiff_fit(pde_finder, data_manager_temp).values

                with savefig('fit_vs_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''),
                                                                       measure_dt),
                             self.experiment_name,
                             subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_vs_real']):
                    self.plot_fitted_vs_real(pde_finder, data_manager_temp)

                with savefig('fit_and_real_der_y{}_noise{}_dt{}'.format(derivative_in_y, str(noise).replace('.', ''),
                                                                        measure_dt),
                             self.experiment_name,
                             subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real']):
                    self.plot_fitted_and_real(pde_finder, data_manager_temp)

                with savefig('zoom_fit_and_real_der_y{}_dt{}_noise{}'.format(derivative_in_y, measure_dt,
                                                                             str(noise).replace('.', '')),
                             self.experiment_name,
                             subfolders=['noise_derivative_in_y_{}'.format(derivative_in_y), 'fit_and_real_zoom']):
                    self.plot_fitted_and_real(pde_finder, data_manager_temp, subinit=self.sub_set_init,
                                              sublen=self.sub_set_len)

        save_csv(rsquares, 'noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name)
        # plt.pcolor(rsquares * (rsquares > 0), cmap='autumn')
        # plt.yticks(np.arange(0.5, len(rsquares.index), 1), np.round(rsquares.index, decimals=2))
        # plt.xticks(np.arange(0.5, len(rsquares.columns), 1), rsquares.columns)

        # ---------- plot heatmap of rsquares ----------
        with savefig('noise_discretization_rsquares_eqfit_der_y{}'.format(derivative_in_y), self.experiment_name):
            rsquares.index = np.round(rsquares.index, decimals=2)
            plt.close('all')
            sns.heatmap(rsquares * (rsquares > 0), annot=True)
            plt.xlabel("Discretization (dt)")
            plt.ylabel("Noise (k*std)")
            plt.title("Noise and discretization for derivative in y {}".format(derivative_in_y))

        return rsquares