Beispiel #1
0
def regress_ols2(y, X, req=[]):

    model, f_out = sm.OLS(y, X.array).fit(), dd(list)

    ## PARAMS ##
    for pv, bw, n in zip(model.pvalues, model.params, X.names):
        f_out[X.parent[n]].append((pv, bw, n))
    x_out = {
        'params': {n: sorted(p)
                   for n, p in f_out.items()},
        'rs': model.rsquared,
        'ars': model.rsquared_adj,
        'bic': model.bic,
        'pwr-05': 0.5,
        'pwr-001': 0.01
    }

    if 'resids' in req: x_out['resids'] = model.resid
    if 'pwr' in req and model.rsquared > 0:
        df_de, df_num, f_2 = len(X.names) - 1, len(y) - len(X.names), np.sqrt(
            model.rsquared / (1 - model.rsquared))
        x_out['pwr-05'], x_out['pwr-001'] = smp.FTestPower().solve_power(
            effect_size=f_2, df_num=df_num, df_denom=df_de,
            alpha=0.05), smp.FTestPower().solve_power(effect_size=f_2,
                                                      df_num=df_num,
                                                      df_denom=df_de,
                                                      alpha=0.001)

    if 'predictors-only' in req and len(X.p_names) > 1:

        p_model, p_out = sm.OLS(y, X.p_array).fit(), dd(list)

        for pv, bw, n in zip(p_model.pvalues, p_model.params, X.p_names):
            p_out[X.parent[n]].append((pv, bw, n))
        P_out = {
            'params': {n: sorted(p)
                       for n, p in p_out.items()},
            'rs': p_model.rsquared,
            'ars': p_model.rsquared_adj,
            'bic': p_model.bic,
            'resids': model.resid
        }
        x_out['predictors'] = P_out

    if 'covariates-only' in req and len(X.c_names) > 1:

        c_model, c_out = sm.OLS(y, X.c_array).fit(), dd(list)
        for pv, bw, n in zip(c_model.pvalues, c_model.params, X.c_names):
            c_out[X.parent[n]].append((pv, bw, n))
        C_out = {
            'params': {n: sorted(p)
                       for n, p in c_out.items()},
            'rs': c_model.rsquared,
            'ars': c_model.rsquared_adj,
            'bic': c_model.bic,
            'resids': model.resid
        }
        x_out['covariates'] = C_out

    return x_out
Beispiel #2
0
    def test(self, y):

        self.y, self.yLen = y, len(y)
        self.execute()

        #		self.resids = RegResiduals(self.X,self.y,self.dist).extract(self.model,self.zero_prob)
        #		self.process()
        if self.valid:
            self.output = [
                (p, b, x, i in self.X.predictor_idx)
                for i, (p, b, x) in enumerate(
                    zip(self.model.pvalues, self.model.params, self.X.names))
            ]

            try:
                self.pwr = {
                    a: smp.FTestPower().solve_power(effect_size=np.sqrt(
                        self.rsq / (1 - self.rsq)),
                                                    df_num=self.dfn,
                                                    df_denom=self.dfd,
                                                    alpha=a)
                    for a in self.alphas
                }
            except:
                self.pwr = {a: 0.5 for a in self.alphas}
        else:
            self.output = [(0.99, 0, x, i in self.X.predictor_idx)
                           for i, x in enumerate(self.X.names)]
            self.pwr = {a: 0.5 for a in self.alphas}

        return self
Beispiel #3
0
    def regress_zip(self, Y, X, interest=None):

        r_out, p_out, alp = {}, dd(lambda: {}), 0.05
        Y = np.array([np.array(log(y + 1.0)) for y in Y])

        null = msc.PoissonZiGMLE(Y, np.array([1 for x in X])).fit(disp=0)
        model = msc.PoissonZiGMLE(Y, np.array(X)).fit(disp=0)
        params = model.params
        try:
            pvals = model.pvalues
        except ValueError:
            pvals = [0.99 for p in params]

        for p in self.D.inferred_predictors:
            p_out[p.split('=')[0]][p.split('=')[1]] = (1, 0)
        for pv, bw, c in zip(pvals, params, self.D.predictors):
            p_out[c.split('=')[0]][c.split('=')[-1]] = (pv, bw)
        for a, b in p_out.items():
            r_out[a] = sorted(b.items(), key=lambda loc: loc[1][0])

        x_out = {
            'rs': 1 - (model.llf / null.llf),
            'ars': 1 - ((model.llf - len(X[0])) / null.llf),
            'bic': model.bic
        }
        f_2 = x_out['rs'] / (1 - x_out['rs'])
        df_de, df_num = len(X[0]) - 1, len(Y) - len(X[0])
        pwr = smp.FTestPower().solve_power(effect_size=np.sqrt(f_2),
                                           df_num=df_num,
                                           df_denom=df_de,
                                           alpha=alp)
        x_out['resids'] = Y
        x_out['params'] = r_out
        return x_out
Beispiel #4
0
    def regress_glmnb(self, Y, X, interest=None):

        r_out, p_out, alp = {}, dd(lambda: {}), 0.05
        null = sm.GLM(Y, [np.array(1) for x in X],
                      family=sm.families.NegativeBinomial()).fit()
        model = sm.GLM(Y, X, family=sm.families.NegativeBinomial()).fit()

        for p in self.D.inferred_predictors:
            p_out[p.split('=')[0]][p.split('=')[1]] = (1, 0)
        for pv, bw, c in zip(model.pvalues, model.params, self.D.predictors):
            p_out[c.split('=')[0]][c.split('=')[-1]] = (pv, bw)
        for a, b in p_out.items():
            r_out[a] = sorted(b.items(), key=lambda loc: loc[1][0])

        x_out = {
            'rs': 1 - (model.llf / null.llf),
            'ars': 1 - ((model.llf - len(X[0])) / null.llf),
            'bic': model.bic
        }

        f_2 = x_out['rs'] / (1 - x_out['rs'])
        df_de, df_num = len(X[0]) - 1, len(Y) - len(X[0])
        pwr = smp.FTestPower().solve_power(effect_size=np.sqrt(f_2),
                                           df_num=df_num,
                                           df_denom=df_de,
                                           alpha=alp)

        x_out['pwr'] = pwr
        x_out['resids'] = [log(x + 1.0) for x in model.resid_pearson]
        x_out['params'] = r_out
        return x_out
Beispiel #5
0
    def test(self, y):

        model = sm.OLS(y, self.X.array).fit()

        self.rsq, self.rsa, self.bic = round(model.rsquared,
                                             5), round(model.rsquared_adj,
                                                       3), round(model.bic, 3)
        self.output = [(p, b, x, i in self.X.predictor_idx) for i, (
            p, b,
            x) in enumerate(zip(model.pvalues, model.params, self.X.names))]

        try:
            self.pwr = {
                a: smp.FTestPower().solve_power(effect_size=np.sqrt(
                    self.rsq / (1 - self.rsq)),
                                                df_num=self.dfn,
                                                df_denom=self.dfd,
                                                alpha=a)
                for a in self.alphas
            }
        except:
            self.pwr = {a: 0.5 for a in self.alphas}
        self.resids, self.c_resids = model.resid, [
            sum([x[j] * model.params[j] for j in self.X.covariate_idx]) + y[i]
            for i, x in enumerate(self.X.array)
        ]

        return self
Beispiel #6
0
    def test_pwr(self, alphas=[0.05, 0.001]):
        if self.model.rsquared > 0:
            df_de = len(self.X.names) - 1
            df_num = len(self.y) - len(self.X.names)
            f_2 = np.sqrt(self.model.rsquared / (1 - self.model.rsquared))

            for a in alphas:
                self.pwr[a] = smp.FTestPower().solve_power(effect_size=f_2,
                                                           df_num=df_num,
                                                           df_denom=df_de,
                                                           alpha=a)

        return self
Beispiel #7
0
    def test(self, y):

        #print self.LOG,'huh'

        #if self.LOG: y = [math.log(yi+1.0,2) for yi in y]



        self.output,self.zero_infl, self.rsq, self.rsa, self.bic, self.aic  = [],0.0,'NA','NA','NA','NA'
        self.valid, self.y, self.yA, self.yLen, self.history = True, y, np.array(
            y), len(y), ''
        self.execute()

        if self.valid:
            self.v_explained = 1 - (np.var(self.res.resid) / np.var(self.yA))
            try:
                self.pwr = {
                    a: smp.FTestPower().solve_power(effect_size=np.sqrt(
                        self.v_explained / (1 - self.v_explained)),
                                                    df_num=self.dfn,
                                                    df_denom=self.dfd,
                                                    alpha=a)
                    for a in self.alphas
                }
            except:
                self.pwr = {a: 0.5 for a in self.alphas}
            if any([np.isnan(pw) for pw in self.pwr.values()]):
                self.pwr = {a: 0.5 for a in self.alphas}
        else:
            self.v_explained = 0
            self.pwr = {a: 0.0 for a in self.alphas}
            self.output = [(0.5, b, t, x, i in self.X.predictor_idx)
                           for i, (p, t, b, x) in enumerate(
                               zip(self.res.pvalues, self.res.tvalues,
                                   self.res.params, self.X.names))]
            self.bic, self.aic, self.rsq, self.rsa = 0, 0, 0, 0

            self.tvalues = self.res.tvalues

            #self.bic, self.aic, self.rsq, self.rsa = self.res.bic, self.res.aic, self.res.prsquared, 1- (((1-self.res.prsquared)*(self.yLen-1)) / self.dfn) #(self.yLen-self.X.len-1))
            #self.output = [(p,b,x,i in self.X.predictor_idx) for i,(p,b,x) in enumerate(zip(self.res.pvalues, self.res.params, self.X.names))]

        return self