def test_multivariate_penalty(): alphas = [1, 2] weights = [1, 1] np.random.seed(1) x, y, pol = multivariate_sample_data() univ_pol1 = UnivariatePolynomialSmoother(x[:, 0], degree=pol.degrees[0]) univ_pol2 = UnivariatePolynomialSmoother(x[:, 1], degree=pol.degrees[1]) gp1 = UnivariateGamPenalty(alpha=alphas[0], univariate_smoother=univ_pol1) gp2 = UnivariateGamPenalty(alpha=alphas[1], univariate_smoother=univ_pol2) mgp = MultivariateGamPenalty(multivariate_smoother=pol, alpha=alphas, weights=weights) for i in range(10): params1 = np.random.randint(-3, 3, pol.smoothers[0].dim_basis) params2 = np.random.randint(-3, 3, pol.smoothers[1].dim_basis) params = np.concatenate([params1, params2]) c1 = gp1.func(params1) c2 = gp2.func(params2) c = mgp.func(params) assert_allclose(c, c1 + c2, atol=1.e-10, rtol=1.e-10) d1 = gp1.deriv(params1) d2 = gp2.deriv(params2) d12 = np.concatenate([d1, d2]) d = mgp.deriv(params) assert_allclose(d, d12) h1 = gp1.deriv2(params1) h2 = gp2.deriv2(params2) h12 = block_diag(h1, h2) h = mgp.deriv2(params) assert_allclose(h, h12)
def test_multivariate_gam_cv(): # SMOKE test # no test is performed. It only checks that there isn't any runtime error def cost(x1, x2): return np.linalg.norm(x1 - x2) / len(x1) cur_dir = os.path.dirname(os.path.abspath(__file__)) file_path = os.path.join(cur_dir, "results", "prediction_from_mgcv.csv") data_from_r = pd.read_csv(file_path) # dataset used to train the R model x = data_from_r.x.values y = data_from_r.y.values df = [10] degree = [5] bsplines = BSplines(x, degree=degree, df=df) # y_mgcv is obtained from R with the following code # g = gam(y~s(x, k = 10, bs = "cr"), data = data, scale = 80) alphas = [0.0251] alphas = [2] cv = KFold(3) gp = MultivariateGamPenalty(bsplines, alpha=alphas) # noqa: F841 gam_cv = MultivariateGAMCV(smoother=bsplines, alphas=alphas, gam=GLMGam, cost=cost, endog=y, exog=None, cv_iterator=cv) gam_cv_res = gam_cv.fit() # noqa: F841
def test_multivariate_gam_1d_data(): cur_dir = os.path.dirname(os.path.abspath(__file__)) file_path = os.path.join(cur_dir, "results", "prediction_from_mgcv.csv") data_from_r = pd.read_csv(file_path) # dataset used to train the R model x = data_from_r.x.values y = data_from_r.y df = [10] degree = [3] bsplines = BSplines(x, degree=degree, df=df) # y_mgcv is obtained from R with the following code # g = gam(y~s(x, k = 10, bs = "cr"), data = data, scale = 80) y_mgcv = data_from_r.y_est # alpha is by manually adjustment to reduce discrepancy in fittedvalues alpha = [0.0168 * 0.0251 / 2 * 500] gp = MultivariateGamPenalty(bsplines, alpha=alpha) # noqa: F841 glm_gam = GLMGam(y, exog=np.ones((len(y), 1)), smoother=bsplines, alpha=alpha) # "nm" converges to a different params, "bfgs" params are close to pirls # res_glm_gam = glm_gam.fit(method='nm', max_start_irls=0, # disp=1, maxiter=10000, maxfun=5000) res_glm_gam = glm_gam.fit(method='pirls', max_start_irls=0, disp=1, maxiter=10000) y_gam = res_glm_gam.fittedvalues # plt.plot(x, y_gam, '.', label='gam') # plt.plot(x, y_mgcv, '.', label='mgcv') # plt.plot(x, y, '.', label='y') # plt.legend() # plt.show() assert_allclose(y_gam, y_mgcv, atol=0.01)
def __init__(self, endog, smoother, alpha, *args, **kwargs): if not isinstance(alpha, Iterable): alpha = np.array([alpha] * len(smoother.smoothers)) self.smoother = smoother self.alpha = alpha self.pen_weight = 1 # TODO: pen weight should not be defined here!! penal = MultivariateGamPenalty(smoother, alpha=alpha) super(LogitGam, self).__init__(endog, smoother.basis, penal=penal, *args, **kwargs)
def __init__(self, endog, exog=None, smoother=None, alpha=0, family=None, offset=None, exposure=None, missing='none', **kwargs): # TODO: check usage of hasconst hasconst = kwargs.get('hasconst', None) xnames_linear = None if hasattr(exog, 'design_info'): self.design_info_linear = exog.design_info xnames_linear = self.design_info_linear.column_names is_pandas = _is_using_pandas(exog, None) # TODO: handle data is experimental, see #5469 # This is a bit wasteful because we need to `handle_data twice` self.data_linear = self._handle_data(endog, exog, missing, hasconst) if xnames_linear is None: xnames_linear = self.data_linear.xnames if exog is not None: exog_linear = np.asarray(exog) k_exog_linear = exog_linear.shape[1] else: exog_linear = None k_exog_linear = 0 self.k_exog_linear = k_exog_linear # We need exog_linear for k-fold cross validation # TODO: alternative is to take columns from combined exog self.exog_linear = exog_linear self.smoother = smoother self.k_smooths = smoother.k_variables self.alpha = self._check_alpha(alpha) penal = MultivariateGamPenalty(smoother, alpha=self.alpha, start_idx=k_exog_linear) kwargs.pop('penal', None) if exog_linear is not None: exog = np.column_stack((exog_linear, smoother.basis)) else: exog = smoother.basis # TODO: check: xnames_linear will be None instead of empty list # if no exog_linear # can smoother be empty ? I guess not allowed. if xnames_linear is None: xnames_linear = [] xnames = xnames_linear + self.smoother.col_names if is_pandas and exog_linear is not None: # we a dataframe so we can get a PandasData instance for wrapping exog = pd.DataFrame(exog, index=self.data_linear.row_labels, columns=xnames) super(GLMGam, self).__init__(endog, exog=exog, family=family, offset=offset, exposure=exposure, penal=penal, missing=missing, **kwargs) if not is_pandas: # set exog nanmes if not given by pandas DataFrame self.exog_names[:] = xnames # TODO: the generic data handling might attach the design_info from the # linear part, but this is incorrect for the full model and # causes problems in wald_test_terms if hasattr(self.data, 'design_info'): del self.data.design_info # formula also might be attached which causes problems in predict if hasattr(self, 'formula'): self.formula_linear = self.formula self.formula = None del self.formula