Esempio n. 1
0
    def __init__(self,
                 target_derivative_order,
                 max_derivative_order,
                 max_polynomial_order,
                 rational=False,
                 with_mean=True,
                 with_std=True,
                 alphas=150,
                 max_iter=10000,
                 cv=20,
                 use_lasso=True,
                 regressor_builders=()):
        self.data_manager = DataManager()
        self.coefs_ = None
        self.rational = rational
        self.use_lasso = use_lasso
        self.cv = cv
        self.max_iter = max_iter
        self.alphas = alphas
        self.with_std = with_std
        self.with_mean = with_mean
        self.max_polynomial_order = max_polynomial_order
        self.max_derivative_order = max_derivative_order
        self.target_derivative_order = target_derivative_order
        self.var_names = None

        self.pde_finder = PDEFinder(with_mean=self.with_mean,
                                    with_std=self.with_std,
                                    use_lasso=self.use_lasso)
        self.pde_finder.set_fitting_parameters(cv=self.cv,
                                               n_alphas=self.alphas,
                                               max_iter=self.max_iter)

        self.regressor_builders = regressor_builders
Esempio n. 2
0
    def test_fit_2(self):
        data_manager = DataManager()
        data_manager.add_variables(self.v)
        data_manager.add_variables(self.v**2)
        data_manager.set_X_operator(
            lambda field: Poly(3) *
            (PolyD({"x": 1}) * field))  # (PolyD({"x": 1})
        data_manager.set_y_operator(lambda field: D(2, "x") * field)

        pde_finder = PDEFinder(with_mean=True, with_std=True)
        pde_finder.set_fitting_parameters(cv=10, n_alphas=100, alphas=None)
        pde_finder.fit(data_manager.get_X_dframe(),
                       data_manager.get_y_dframe())
        print(pde_finder.coefs_)  # strange th value obtained

        print((pde_finder.transform(data_manager.get_X_dframe()) -
               data_manager.get_y_dframe()).abs().mean().values)
        assert np.max((pde_finder.transform(data_manager.get_X_dframe()) -
                       data_manager.get_y_dframe()).abs().mean().values) < 1e-5

        res = pde_finder.get_equation(*data_manager.get_Xy_eq())
        print(res)

        res = pde_finder.get_equation(data_manager.get_X_sym(),
                                      data_manager.get_y_sym())
        print(res)
Esempio n. 3
0
    def test_integrate(self):
        trainSplit = DataSplit({"x": 0.7})
        testSplit = DataSplit({"x": 0.3}, {"x": 0.7})

        data_manager = DataManager()
        data_manager.add_variables(self.v)
        data_manager.set_X_operator(
            lambda field: PolyD({"x": 1}) * field)  # (PolyD({"x": 1})
        data_manager.set_y_operator(lambda field: D(2, "x") * field)
        data_manager.set_domain()

        pde_finder = PDEFinder(with_mean=True, with_std=True)
        pde_finder.set_fitting_parameters(cv=20, n_alphas=100, alphas=None)
        pde_finder.fit(data_manager.get_X_dframe(trainSplit),
                       data_manager.get_y_dframe(trainSplit))
        print(pde_finder.coefs_)  # strange th value obtained

        # warning!!!
        predictions_df = pde_finder.integrate([
            DataSplitOnIndex({"x": 5}) * testSplit,
            DataSplitOnIndex({"x": 20}) * testSplit
        ],
                                              data_manager,
                                              starting_point={"x": -1},
                                              domain_variable2predict="x",
                                              horizon=10)

        print(predictions_df)
Esempio n. 4
0
    def fit(self, X: (pd.DataFrame, pd.Series), y=None):
        """
        In principle the target is not needed because it uses the X time series to fit the differential equation.
        :param X: rows are series; columns index time.
        :param y:
        :return:
        """
        self.prepare_data(X)

        # ---------- fit data ----------
        self.pde_finder = PDEFinder(with_mean=self.with_mean,
                                    with_std=self.with_std,
                                    use_lasso=self.use_lasso)
        self.pde_finder.set_fitting_parameters(cv=self.cv,
                                               n_alphas=self.alphas,
                                               max_iter=self.max_iter)

        self.pde_finder.fit(self.data_manager.get_X_dframe(),
                            self.data_manager.get_y_dframe())
        self.coefs_ = self.pde_finder.coefs_
Esempio n. 5
0
    def test_evaluator(self):
        trainSplit = DataSplit({"x": 0.7})
        testSplit = DataSplit({"x": 0.3}, {"x": 0.7})

        data_manager = DataManager()
        data_manager.add_variables(self.v)
        data_manager.add_variables(self.x)
        data_manager.set_X_operator(
            lambda field: PolyD({"x": 1}) * field)  # (PolyD({"x": 1})
        data_manager.set_y_operator(lambda field: D(3, "x") * field)

        pde_finder = PDEFinder(with_mean=True, with_std=True)
        pde_finder.set_fitting_parameters(cv=20, n_alphas=100, alphas=None)
        pde_finder.fit(data_manager.get_X_dframe(trainSplit),
                       data_manager.get_y_dframe(trainSplit))
        print(pde_finder.coefs_)  # strange th value obtained

        real, pred = evaluate_predictions(pde_finder,
                                          data_split_operator=testSplit,
                                          dm=data_manager,
                                          starting_point={"x": -1},
                                          domain_variable2predict="x",
                                          horizon=10,
                                          num_evaluations=1)

        assert np.mean(
            real.drop(["random_split", "method"], axis=1).values -
            pred.drop(["method"], axis=1).values[1:, :]) < 0.001
Esempio n. 6
0
    def fit_eqdifff(self, data_manager):
        with evaluator.timeit('pdefind fitting'):
            pde_finder = PDEFinder(with_mean=self.with_mean, with_std=self.with_std, use_lasso=self.use_lasso)
            pde_finder.set_fitting_parameters(cv=self.cv, n_alphas=self.alphas, max_iter=self.max_iter,
                                              alphas=self.alpha)
            X = data_manager.get_X_dframe(self.trainSplit)
            Y = data_manager.get_y_dframe(self.trainSplit)

            if X.shape[0] > self.max_train_cases:
                sample = np.random.choice(X.shape[0], size=self.max_train_cases)
                X = X.iloc[sample, :]
                Y = Y.iloc[sample, :]
            # if X.shape[1] > self.max_num_params:
            #     raise Exception('More params than allowed: params of X={} and max value of params is {}'.format(
            #         X.shape[1], self.max_num_params))
            pde_finder.fit(X, Y)
        return pde_finder
Esempio n. 7
0
class SkODEFind:
    def __init__(self,
                 target_derivative_order,
                 max_derivative_order,
                 max_polynomial_order,
                 rational=False,
                 with_mean=True,
                 with_std=True,
                 alphas=150,
                 max_iter=10000,
                 cv=20,
                 use_lasso=True,
                 regressor_builders=()):
        self.data_manager = DataManager()
        self.coefs_ = None
        self.rational = rational
        self.use_lasso = use_lasso
        self.cv = cv
        self.max_iter = max_iter
        self.alphas = alphas
        self.with_std = with_std
        self.with_mean = with_mean
        self.max_polynomial_order = max_polynomial_order
        self.max_derivative_order = max_derivative_order
        self.target_derivative_order = target_derivative_order
        self.var_names = None

        self.pde_finder = PDEFinder(with_mean=self.with_mean,
                                    with_std=self.with_std,
                                    use_lasso=self.use_lasso)
        self.pde_finder.set_fitting_parameters(cv=self.cv,
                                               n_alphas=self.alphas,
                                               max_iter=self.max_iter)

        self.regressor_builders = regressor_builders

    # ---------- dictionary of functions and target ----------
    def get_x_operator_func(self):
        def x_operator(field, regressors):
            new_field = copy.deepcopy(field)
            if self.max_derivative_order > 0:
                new_field = PolyD({'t': self.max_derivative_order}) * new_field
            new_field.append(regressors)
            if self.rational:
                new_field.append(new_field.__rtruediv__(1.0))
            new_field = Poly(self.max_polynomial_order) * new_field
            return new_field

        return x_operator

    def get_y_operator_func(self):
        def y_operator(field):
            new_field = D(self.target_derivative_order, "t") * field
            return new_field

        return y_operator

    def get_regressors(self, domain, variables):
        # TODO: only works with time variable regressor
        reggressors = []
        for reg_builder in self.regressor_builders:
            for variable in variables:
                reg_builder.fit(variable.domain, variable)
                serie = reg_builder.transform(
                    domain.get_range(axis_names=[reg_builder.domain_axes_name])
                    [reg_builder.domain_axes_name])
                reggressors.append(
                    Variable(serie,
                             domain,
                             domain2axis={reg_builder.domain_axes_name: 0},
                             variable_name='{}_{}'.format(
                                 variable.get_name(), reg_builder.name)))
        return reggressors

    def prepare_data(self, X):
        # ---------- Prepare data ----------
        # no time is defined in input so invents dt=1 and starting from 0
        domain = Domain(lower_limits_dict={"t": X.index.min()},
                        upper_limits_dict={"t": X.index.max()},
                        step_width_dict={"t": np.diff(X.index)[0]})

        # define variables
        X = pd.DataFrame(X)
        variables = [
            Variable(X[series_name].values.ravel(),
                     domain,
                     domain2axis={"t": 0},
                     variable_name=series_name)
            for i, series_name in enumerate(X.columns)
        ]

        self.data_manager.add_variables(variables)
        self.data_manager.add_regressors(self.get_regressors(
            domain, variables))
        self.data_manager.set_domain()

        self.data_manager.set_X_operator(self.get_x_operator_func())
        self.data_manager.set_y_operator(self.get_y_operator_func())
        self.var_names = [
            var.get_full_name() for var in self.data_manager.field.data
        ]

    def fit(self, X: (pd.DataFrame, pd.Series), y=None):
        """
        In principle the target is not needed because it uses the X time series to fit the differential equation.
        :param X: rows are series; columns index time.
        :param y:
        :return:
        """
        self.prepare_data(X)

        # ---------- fit data ----------
        self.pde_finder = PDEFinder(with_mean=self.with_mean,
                                    with_std=self.with_std,
                                    use_lasso=self.use_lasso)
        self.pde_finder.set_fitting_parameters(cv=self.cv,
                                               n_alphas=self.alphas,
                                               max_iter=self.max_iter)

        self.pde_finder.fit(self.data_manager.get_X_dframe(),
                            self.data_manager.get_y_dframe())
        self.coefs_ = self.pde_finder.coefs_

    def predict(self, forecast_horizon):
        assert self.coefs_ is not None, 'coeffs was not defined, use set_coefs'
        times = forecast_horizon * self.data_manager.domain.step_width["t"]
        return pd.DataFrame(self.pde_finder.integrate(
            t=times + self.data_manager.domain.upper_limits["t"] -
            self.data_manager.domain.step_width["t"],
            data_manager=self.data_manager,
            dery=self.target_derivative_order),
                            index=times +
                            self.data_manager.domain.upper_limits["t"])[0]

    def __str__(self):
        return 'skodefind_target{}_maxd{}_maxpoly{}'.format(
            self.target_derivative_order, self.max_derivative_order,
            self.max_polynomial_order)