Exemple #1
0
    def test_rf(self):
        ## Fit routine creates usable model
        md_fit = fit.fit_rf(
            self.df_tree,
            md=self.md_tree,
            max_depth=1,  # True tree is a stump
            seed=101,
        )
        df_res = gr.eval_df(md_fit, self.df_tree[self.md_tree.var])

        ## RF can approximately recover a tree; check ends only
        self.assertTrue(
            gr.df_equal(
                df_res[["y_mean", "z_mean"]].iloc[[0, 1, -2, -1]],
                self.df_tree[["y", "z"]].iloc[[0, 1, -2, -1]] >> gr.tf_rename(
                    y_mean="y", z_mean="z"),
                close=True,
                precision=1,
            ))

        ## Fit copies model data
        self.assertTrue(set(md_fit.var) == set(self.md_tree.var))
        self.assertTrue(
            set(md_fit.out) == set(map(lambda s: s + "_mean",
                                       self.md_tree.out)))
Exemple #2
0
 def test_rename(self):
     df = data.df_diamonds.rename(columns={
         "cut": "Cut",
         "table": "Table",
         "carat": "Carat"
     })
     d = data.df_diamonds >> gr.tf_rename(
         Cut=X.cut, Table=X.table, Carat="carat")
     self.assertTrue(df.equals(d))
Exemple #3
0
    def test_lm(self):
        ## Fit routine creates usable model
        md_fit = fit.fit_lm(
            self.df_smooth,
            md=self.md_smooth,
        )
        df_res = gr.eval_df(md_fit, self.df_smooth[self.md_smooth.var])

        ## LM can recover a linear model
        self.assertTrue(
            gr.df_equal(
                df_res,
                self.df_smooth >> gr.tf_rename(y_mean="y", z_mean="z"),
                close=True,
                precision=1,
            ))

        ## Fit copies model data
        self.assertTrue(set(md_fit.var) == set(self.md_smooth.var))
        self.assertTrue(
            set(md_fit.out) == set(
                map(lambda s: s + "_mean", self.md_smooth.out)))
Exemple #4
0
    def test_nls(self):
        ## Ground-truth model
        c_true = 2
        a_true = 1

        md_true = (gr.Model() >> gr.cp_function(
            fun=lambda x: a_true * np.exp(x[0] * c_true) + x[1],
            var=["x", "epsilon"],
            out=["y"],
        ) >> gr.cp_marginals(epsilon={
            "dist": "norm",
            "loc": 0,
            "scale": 0.5
        }) >> gr.cp_copula_independence())
        df_data = md_true >> gr.ev_sample(
            n=5, seed=101, df_det=gr.df_make(x=[0, 1, 2, 3, 4]))

        ## Model to fit
        md_param = (gr.Model() >> gr.cp_function(
            fun=lambda x: x[2] * np.exp(x[0] * x[1]),
            var=["x", "c", "a"],
            out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0]))

        ## Fit the model
        md_fit = df_data >> gr.ft_nls(
            md=md_param,
            verbose=False,
            uq_method="linpool",
        )

        ## Unidentifiable model throws warning
        # -------------------------
        md_unidet = (gr.Model() >> gr.cp_function(
            fun=lambda x: x[2] / x[3] * np.exp(x[0] * x[1]),
            var=["x", "c", "a", "z"],
            out=["y"],
        ) >> gr.cp_bounds(c=[0, 4], a=[0.1, 2.0], z=[0, 1]))
        with self.assertWarns(RuntimeWarning):
            gr.fit_nls(
                df_data,
                md=md_unidet,
                uq_method="linpool",
            )

        ## True parameters in wide confidence region
        # -------------------------
        alpha = 1e-3
        self.assertTrue(
            (md_fit.density.marginals["c"].q(alpha / 2) <= c_true)
            and (c_true <= md_fit.density.marginals["c"].q(1 - alpha / 2)))

        self.assertTrue(
            (md_fit.density.marginals["a"].q(alpha / 2) <= a_true)
            and (a_true <= md_fit.density.marginals["a"].q(1 - alpha / 2)))

        ## Model with fixed parameter
        # -------------------------
        md_fixed = (gr.Model() >> gr.cp_function(
            fun=lambda x: x[2] * np.exp(x[0] * x[1]),
            var=["x", "c", "a"],
            out=["y"]) >> gr.cp_bounds(c=[0, 4], a=[1, 1]))
        md_fit_fixed = df_data >> gr.ft_nls(
            md=md_fixed, verbose=False, uq_method="linpool")

        # Test that fixed model can evaluate successfully
        gr.eval_sample(md_fit_fixed, n=1, df_det="nom")

        ## Trajectory model
        # -------------------------
        md_base = models.make_trajectory_linear()
        md_fit = data.df_trajectory_windowed >> gr.ft_nls(
            md=md_base, method="SLSQP", tol=1e-3)
        df_tmp = md_fit >> gr.ev_nominal(df_det="nom")

        ## Select output for fitting
        # -------------------------
        # Split model has inconsistent "true" parameter value
        md_split = (gr.Model("Split") >> gr.cp_vec_function(
            fun=lambda df: gr.df_make(
                f=1 * df.c * df.x,
                g=2 * df.c * df.x,
            ),
            var=["c", "x"],
            out=["f", "g"],
        ) >> gr.cp_bounds(
            x=(-1, +1),
            c=(-1, +1),
        ))

        df_split = (gr.df_make(x=gr.linspace(-1, +1, 100)) >> gr.tf_mutate(
            f=X.x, g=X.x))

        # Fitting both outputs: cannot achieve mse ~= 0
        df_both = (df_split >> gr.ft_nls(md_split, out=["f", "g"]) >>
                   gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >>
                   gr.tf_summarize(
                       mse_f=gr.mse(X.f, X.f_t),
                       mse_g=gr.mse(X.g, X.g_t),
                   ))
        self.assertTrue(df_both.mse_f[0] > 0)
        self.assertTrue(df_both.mse_g[0] > 0)

        # Fitting "f" only
        df_f = (df_split >> gr.ft_nls(md_split, out=["f"]) >>
                gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >>
                gr.tf_summarize(
                    mse_f=gr.mse(X.f, X.f_t),
                    mse_g=gr.mse(X.g, X.g_t),
                ))
        self.assertTrue(df_f.mse_f[0] < 1e-16)
        self.assertTrue(df_f.mse_g[0] > 0)

        # Fitting "g" only
        df_g = (df_split >> gr.ft_nls(md_split, out=["g"]) >>
                gr.ev_df(df_split >> gr.tf_rename(f_t=X.f, g_t=X.g)) >>
                gr.tf_summarize(
                    mse_f=gr.mse(X.f, X.f_t),
                    mse_g=gr.mse(X.g, X.g_t),
                ))
        self.assertTrue(df_g.mse_f[0] > 0)
        self.assertTrue(df_g.mse_g[0] < 1e-16)