def init(cls):
        nobs = cls.nobs
        y_true, x, exog = cls.y_true, cls.x, cls.exog
        if not hasattr(cls, 'scale'):
            scale = 1
        else:
            scale = cls.scale

        f = cls.family

        cls.mu_true = mu_true = f.link.inverse(y_true)

        np.random.seed(8765993)
        #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float)
        if issubclass(get_class(cls.rvs), stats.rv_discrete):
            # Discrete distributions don't take `scale`.
            y_obs = cls.rvs(mu_true, size=nobs)
        else:
            y_obs = cls.rvs(mu_true, scale=scale, size=nobs)
        m = GAM(y_obs, x, family=f)  #TODO: y_obs is twice __init__ and fit
        m.fit(y_obs, maxiter=100)
        res_gam = m.results
        cls.res_gam = res_gam  #attached for debugging
        cls.mod_gam = m  #attached for debugging

        res_glm = GLM(y_obs, exog, family=f).fit()

        #Note: there still are some naming inconsistencies
        cls.res1 = res1 = Dummy()  #for gam model
        #res2 = Dummy() #for benchmark
        cls.res2 = res2 = res_glm  #reuse existing glm results, will add additional

        #eta in GLM terminology
        res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True)
        res1.y_pred = res_gam.predict(x)
        res1.y_predshort = res_gam.predict(x[:10])  #, linear=True)

        #mu
        res2.mu_pred = res_glm.model.predict(res_glm.params,
                                             exog,
                                             linear=False)
        res1.mu_pred = res_gam.mu

        #parameters
        slopes = [i for ss in m.smoothers for i in ss.params[1:]]
        const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers])
        res1.params = np.array([const] + slopes)
Ejemplo n.º 2
0
    def init(self):
        nobs = self.nobs
        y_true, x, exog = self.y_true, self.x, self.exog
        if not hasattr(self, 'scale'):
            scale = 1
        else:
            scale = self.scale

        f = self.family

        self.mu_true = mu_true = f.link.inverse(y_true)

        np.random.seed(8765993)
        #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float)
        if issubclass(self.rvs.__self__.__class__, stats.rv_discrete):
            # Discrete distributions don't take `scale`.
            y_obs = self.rvs(mu_true, size=nobs)
        else:
            y_obs = self.rvs(mu_true, scale=scale, size=nobs)
        m = GAM(y_obs, x, family=f)  #TODO: y_obs is twice __init__ and fit
        m.fit(y_obs, maxiter=100)
        res_gam = m.results
        self.res_gam = res_gam   #attached for debugging
        self.mod_gam = m   #attached for debugging

        res_glm = GLM(y_obs, exog, family=f).fit()

        #Note: there still are some naming inconsistencies
        self.res1 = res1 = Dummy() #for gam model
        #res2 = Dummy() #for benchmark
        self.res2 = res2 = res_glm  #reuse existing glm results, will add additional

        #eta in GLM terminology
        res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True)
        res1.y_pred = res_gam.predict(x)
        res1.y_predshort = res_gam.predict(x[:10]) #, linear=True)

        #mu
        res2.mu_pred = res_glm.model.predict(res_glm.params, exog, linear=False)
        res1.mu_pred = res_gam.mu

        #parameters
        slopes = [i for ss in m.smoothers for i in ss.params[1:]]
        const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers])
        res1.params = np.array([const] + slopes)
Ejemplo n.º 3
0
    def init(self):
        nobs = self.nobs
        y_true, x, exog = self.y_true, self.x, self.exog
        if not hasattr(self, 'scale'):
            scale = 1
        else:
            scale = self.scale

        f = self.family

        self.mu_true = mu_true = f.link.inverse(y_true)

        np.random.seed(8765993)
        #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float)
        y_obs = self.rvs(mu_true, scale=scale, size=nobs)  #this should work
        m = GAM(y_obs, x, family=f)  #TODO: y_obs is twice __init__ and fit
        m.fit(y_obs, maxiter=100)
        res_gam = m.results
        self.res_gam = res_gam  #attached for debugging
        self.mod_gam = m  #attached for debugging

        res_glm = GLM(y_obs, exog, family=f).fit()

        #Note: there still are some naming inconsistencies
        self.res1 = res1 = Dummy()  #for gam model
        #res2 = Dummy() #for benchmark
        self.res2 = res2 = res_glm  #reuse existing glm results, will add additional

        #eta in GLM terminology
        res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True)
        res1.y_pred = res_gam.predict(x)
        res1.y_predshort = res_gam.predict(x[:10])  #, linear=True)

        #mu
        res2.mu_pred = res_glm.model.predict(res_glm.params,
                                             exog,
                                             linear=False)
        res1.mu_pred = res_gam.mu

        #parameters
        slopes = [i for ss in m.smoothers for i in ss.params[1:]]
        const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers])
        res1.params = np.array([const] + slopes)
Ejemplo n.º 4
0
x2 = R.standard_normal(nobs)
x2.sort()
y = R.standard_normal((nobs, ))

d = np.array([x1, x2]).T

import scipy.stats, time

print("binomial")
f = family.Binomial()

# b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)])
# b.shape = y.shape
b = np.zeros(len(x1))
b[x1 > 0.5] = 1

m = GAM(b, d, family=f)
toc = time.time()
m.fit(b)
tic = time.time()
print(tic - toc)

plt.figure()
plt.plot(x1, standardize(m.smoothers[0](x1)), 'r')
#plt.plot(x1, standardize(f1(x1)), linewidth=2)
#plt.figure()
plt.plot(x2, standardize(m.smoothers[0](x2)), 'r')
#plt.plot(x2, standardize(f2(x2)), linewidth=2)

plt.show()
Ejemplo n.º 5
0
exog_reduced = exog[:, idx]  #remove duplicate constant
y_true = exog.sum(1)  #/ 4.
z = y_true  #alias check
d = x
y = y_true + sigma_noise * np.random.randn(nobs)

example = 3

if example == 2:
    print "binomial"
    f = family.Binomial()
    mu_true = f.link.inverse(z)
    #b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)])
    b = np.asarray([stats.bernoulli.rvs(p) for p in f.link.inverse(z)])
    b.shape = y.shape
    m = GAM(b, d, family=f)
    toc = time.time()
    m.fit(b)
    tic = time.time()
    print tic - toc
    #for plotting
    yp = f.link.inverse(y)
    p = b

if example == 3:
    print "Poisson"
    f = family.Poisson()
    #y = y/y.max() * 3
    yp = f.link.inverse(z)
    #p = np.asarray([scipy.stats.poisson.rvs(p) for p in f.link.inverse(y)], float)
    p = np.asarray([stats.poisson.rvs(p) for p in f.link.inverse(z)], float)
Ejemplo n.º 6
0
exog_reduced = exog[:,idx]  #remove duplicate constant
y_true = exog.sum(1) #/ 4.
z = y_true #alias check
d = x
y = y_true + sigma_noise * np.random.randn(nobs)

example = 3

if example == 2:
    print("binomial")
    f = family.Binomial()
    mu_true = f.link.inverse(z)
    #b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)])
    b = np.asarray([stats.bernoulli.rvs(p) for p in f.link.inverse(z)])
    b.shape = y.shape
    m = GAM(b, d, family=f)
    toc = time.time()
    m.fit(b)
    tic = time.time()
    print(tic-toc)
    #for plotting
    yp = f.link.inverse(y)
    p = b


if example == 3:
    print("Poisson")
    f = family.Poisson()
    #y = y/y.max() * 3
    yp = f.link.inverse(z)
    #p = np.asarray([scipy.stats.poisson.rvs(p) for p in f.link.inverse(y)], float)