def init(cls): nobs = cls.nobs y_true, x, exog = cls.y_true, cls.x, cls.exog if not hasattr(cls, 'scale'): scale = 1 else: scale = cls.scale f = cls.family cls.mu_true = mu_true = f.link.inverse(y_true) np.random.seed(8765993) #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float) if issubclass(get_class(cls.rvs), stats.rv_discrete): # Discrete distributions don't take `scale`. y_obs = cls.rvs(mu_true, size=nobs) else: y_obs = cls.rvs(mu_true, scale=scale, size=nobs) m = GAM(y_obs, x, family=f) #TODO: y_obs is twice __init__ and fit m.fit(y_obs, maxiter=100) res_gam = m.results cls.res_gam = res_gam #attached for debugging cls.mod_gam = m #attached for debugging res_glm = GLM(y_obs, exog, family=f).fit() #Note: there still are some naming inconsistencies cls.res1 = res1 = Dummy() #for gam model #res2 = Dummy() #for benchmark cls.res2 = res2 = res_glm #reuse existing glm results, will add additional #eta in GLM terminology res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True) res1.y_pred = res_gam.predict(x) res1.y_predshort = res_gam.predict(x[:10]) #, linear=True) #mu res2.mu_pred = res_glm.model.predict(res_glm.params, exog, linear=False) res1.mu_pred = res_gam.mu #parameters slopes = [i for ss in m.smoothers for i in ss.params[1:]] const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers]) res1.params = np.array([const] + slopes)
def init(self): nobs = self.nobs y_true, x, exog = self.y_true, self.x, self.exog if not hasattr(self, 'scale'): scale = 1 else: scale = self.scale f = self.family self.mu_true = mu_true = f.link.inverse(y_true) np.random.seed(8765993) #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float) if issubclass(self.rvs.__self__.__class__, stats.rv_discrete): # Discrete distributions don't take `scale`. y_obs = self.rvs(mu_true, size=nobs) else: y_obs = self.rvs(mu_true, scale=scale, size=nobs) m = GAM(y_obs, x, family=f) #TODO: y_obs is twice __init__ and fit m.fit(y_obs, maxiter=100) res_gam = m.results self.res_gam = res_gam #attached for debugging self.mod_gam = m #attached for debugging res_glm = GLM(y_obs, exog, family=f).fit() #Note: there still are some naming inconsistencies self.res1 = res1 = Dummy() #for gam model #res2 = Dummy() #for benchmark self.res2 = res2 = res_glm #reuse existing glm results, will add additional #eta in GLM terminology res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True) res1.y_pred = res_gam.predict(x) res1.y_predshort = res_gam.predict(x[:10]) #, linear=True) #mu res2.mu_pred = res_glm.model.predict(res_glm.params, exog, linear=False) res1.mu_pred = res_gam.mu #parameters slopes = [i for ss in m.smoothers for i in ss.params[1:]] const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers]) res1.params = np.array([const] + slopes)
def init(self): nobs = self.nobs y_true, x, exog = self.y_true, self.x, self.exog if not hasattr(self, 'scale'): scale = 1 else: scale = self.scale f = self.family self.mu_true = mu_true = f.link.inverse(y_true) np.random.seed(8765993) #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float) y_obs = self.rvs(mu_true, scale=scale, size=nobs) #this should work m = GAM(y_obs, x, family=f) #TODO: y_obs is twice __init__ and fit m.fit(y_obs, maxiter=100) res_gam = m.results self.res_gam = res_gam #attached for debugging self.mod_gam = m #attached for debugging res_glm = GLM(y_obs, exog, family=f).fit() #Note: there still are some naming inconsistencies self.res1 = res1 = Dummy() #for gam model #res2 = Dummy() #for benchmark self.res2 = res2 = res_glm #reuse existing glm results, will add additional #eta in GLM terminology res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True) res1.y_pred = res_gam.predict(x) res1.y_predshort = res_gam.predict(x[:10]) #, linear=True) #mu res2.mu_pred = res_glm.model.predict(res_glm.params, exog, linear=False) res1.mu_pred = res_gam.mu #parameters slopes = [i for ss in m.smoothers for i in ss.params[1:]] const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers]) res1.params = np.array([const] + slopes)
x2 = R.standard_normal(nobs) x2.sort() y = R.standard_normal((nobs, )) d = np.array([x1, x2]).T import scipy.stats, time print("binomial") f = family.Binomial() # b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)]) # b.shape = y.shape b = np.zeros(len(x1)) b[x1 > 0.5] = 1 m = GAM(b, d, family=f) toc = time.time() m.fit(b) tic = time.time() print(tic - toc) plt.figure() plt.plot(x1, standardize(m.smoothers[0](x1)), 'r') #plt.plot(x1, standardize(f1(x1)), linewidth=2) #plt.figure() plt.plot(x2, standardize(m.smoothers[0](x2)), 'r') #plt.plot(x2, standardize(f2(x2)), linewidth=2) plt.show()
exog_reduced = exog[:, idx] #remove duplicate constant y_true = exog.sum(1) #/ 4. z = y_true #alias check d = x y = y_true + sigma_noise * np.random.randn(nobs) example = 3 if example == 2: print "binomial" f = family.Binomial() mu_true = f.link.inverse(z) #b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)]) b = np.asarray([stats.bernoulli.rvs(p) for p in f.link.inverse(z)]) b.shape = y.shape m = GAM(b, d, family=f) toc = time.time() m.fit(b) tic = time.time() print tic - toc #for plotting yp = f.link.inverse(y) p = b if example == 3: print "Poisson" f = family.Poisson() #y = y/y.max() * 3 yp = f.link.inverse(z) #p = np.asarray([scipy.stats.poisson.rvs(p) for p in f.link.inverse(y)], float) p = np.asarray([stats.poisson.rvs(p) for p in f.link.inverse(z)], float)
exog_reduced = exog[:,idx] #remove duplicate constant y_true = exog.sum(1) #/ 4. z = y_true #alias check d = x y = y_true + sigma_noise * np.random.randn(nobs) example = 3 if example == 2: print("binomial") f = family.Binomial() mu_true = f.link.inverse(z) #b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)]) b = np.asarray([stats.bernoulli.rvs(p) for p in f.link.inverse(z)]) b.shape = y.shape m = GAM(b, d, family=f) toc = time.time() m.fit(b) tic = time.time() print(tic-toc) #for plotting yp = f.link.inverse(y) p = b if example == 3: print("Poisson") f = family.Poisson() #y = y/y.max() * 3 yp = f.link.inverse(z) #p = np.asarray([scipy.stats.poisson.rvs(p) for p in f.link.inverse(y)], float)