Beispiel #1
0
    def __init__(self, formula=None, data=None, **kwargs):

        # convert all variables raised to a power to float64
        # this prevents mis-specification of probabilities in cases of variable overflow
        # (if the original var was compressed to a smaller bit integer/float)
        if type(data) == pd.DataFrame:
            power_vars = list(set(re.findall(r'(?<=power\().+?(?=,)',
                                             formula)))
            for var in power_vars:
                data[var] = data[var].astype('float64')

        if formula:
            y, X = patsy.dmatrices(formula, data, 1)
            self._y_design_info = y.design_info
            self._X_design_info = X.design_info
            self._model = GLM(y, X, family=Binomial(), **kwargs)
            self._fit = self._model.fit()
            self._betas = self._fit.params
            self._link = logit
        else:
            self._y_design_info = None
            self._X_design_info = None
            self._model = None
            self._fit = None
            self._betas = None
            self._link = logit
 def test_binomial(self):
     model = BinomialRegressor()
     y = Binomial().fitted(self.eta)
     model.fit(self.X, y)
     y_hat = model.predict(self.X)
     diff = y_hat - y
     rsq = 1 - np.mean(diff**2) / np.mean((y-np.mean(y))**2)
     assert_true(rsq > .9)
Beispiel #3
0
def irls_incremental(filename,
                     formula,
                     chunksize,
                     family=Binomial(),
                     link=Logit(),
                     maxit=25,
                     tol=1e-08,
                     rNames=None,
                     headerNames=None):
    chunck_generator2._filename = filename
    chunck_generator2._nRows = chunksize
    x = None
    y0, dta = incr_dbuilders(
        'I(use.eq("Y").mul(1)) ~ age + I(age**2) + urban + livch',
        chunck_generator2)
    nCols = len(dta.column_names)
    if rNames is None:
        rNames = dta.column_names
    for j in range(1, maxit + 1):
        if x is None: x = np.zeros(nCols)
        ATWA = np.zeros((nCols, nCols))
        ATWz = np.zeros(nCols)
        for data_chunk in chunck_generator2():
            yb, rowA = dmatrices((y0, dta),
                                 data_chunk,
                                 NA_action="drop",
                                 return_type="dataframe")
            yb = yb.values.ravel()
            A = np.asarray(rowA[rNames], dtype=np.float32)
            eta = np.matmul(A, x)
            eta = eta.reshape(len(eta))
            g = link.inverse(eta)
            mu_eta = link.inverse_deriv
            gprime = mu_eta(eta)
            z = np.array(eta + (yb - g) / gprime)
            varianceFam = family.variance
            linkinvFam = link.inverse
            g = linkinvFam(eta)
            varg = varianceFam(g)
            W = gprime**2 / varg
            W = W.reshape(len(W), 1)
            cross2 = np.matmul(np.transpose(A), np.asarray(W.reshape(-1) * z))
            ATWz = ATWz + cross2
            cross1 = np.matmul(np.transpose(A), np.asarray(W * A))
            ATWA = ATWA + cross1
        xold = x
        C, rank, piv = cholesky_pivot(ATWA, full_pivot=True)
        if rank < C.shape[1]:
            raise LinAlgError("Rank-deficiency detected.")
        x = solve_triangular(np.transpose(C), ATWz[piv - 1], lower=True)
        x = solve_triangular(C, x, lower=False)[piv - 1]
        if (np.sqrt(np.matmul(np.transpose(x - xold), x - xold)) < tol): break
    if headerNames is not None:
        x = pd.DataFrame(x, headerNames)
    else:
        x = pd.DataFrame(x, index=list(rowA))
    return (x, j)
Beispiel #4
0
def _train(elements, model_cfg):
    """Construct one model per building block type"""
    models = {}
    target = model_cfg.target
    for model in model_cfg.sections:
        # Construct model formula from configuration
        terms = " + ".join(["1"] + [f"C({f})" for f in model.factors])

        # Train model
        models[model.label] = GLM.from_formula(
            f"{target} ~ {terms}",
            family=Binomial(),
            data=filter_data(elements, {model_cfg.label_column: model.label}),
        ).fit(scale="X2")
    return models
def compute_chi2_null_test(model_results, data, dep_var, max_iter, l2_weight):
    """
    Compute difference from null model using deviance:
    P(null) - P(model) ~ chi_2
    """
    null_formula = '%s ~ 1' % (dep_var)
    null_model = GLM.from_formula(null_formula,
                                  data,
                                  family=Binomial(link=logit()))
    null_model_results = null_model.fit_regularized(maxiter=max_iter,
                                                    method='elastic_net',
                                                    alpha=l2_weight,
                                                    L1_wt=0.0)
    model_loglike = model_results.model.loglike(model_results.params)
    null_model_loglike = null_model_results.model.loglike(
        null_model_results.params)
    llr = -2 * (null_model_loglike - model_loglike)
    model_df = model_results.model.df_model
    p_val = chi2.sf(llr, model_df)
    return llr, model_df, p_val
Beispiel #6
0
 def setUpClass(cls):
     cls.forwarding_family = BinomialWrapper()
     cls.family = Binomial()
Beispiel #7
0
 def __init__(self, link=L.logit):  # , n=1.):
     # TODO: it *should* work for a constant n>1 actually, if data_weights
     # is equal to n
     self.family = Binomial(link=link)
Beispiel #8
0
class BinomialWrapper(FamilyWrapper):
    """
    The wrapper of Binomial exponential family distribution,
    with function for per sample probability.
    Parameters
    ----------
    link : a link instance, optional
        The default link for the Binomial family is the logit link.
        Available links are logit, probit, cauchy, log, and cloglog.
        See statsmodels.family.links for more information.
    Attributes
    ----------
    family : a statsmodel Binomial family object
    --------
    """
    def __init__(self, link=L.logit):  # , n=1.):
        # TODO: it *should* work for a constant n>1 actually, if data_weights
        # is equal to n
        self.family = Binomial(link=link)

    def loglike_per_sample(self, endog, mu, scale=1.):
        """
        The function to calculate log-likelihood per sample
        in terms of the fitted mean response.
        Parameters
        ----------
        endog : array-like of shape (n, k) or (n, )
            Endogenous response variable
        mu : array-like of shape (n, )
            Fitted mean response variable
        scale : float, optional
            Not used for the Binomial GLM.
        Returns
        -------
        log_p : array-like of shape (n, )
            The value of the loglikelihood function evaluated per sample
            (endog,mu,freq_weights,scale) as defined below.
        Notes
        --------
        If the endogenous variable is binary:
        .. math::
         log_p_{i} = (y_i * \log(\mu_i/(1-\mu_i)) + \log(1-\mu_i))
        If the endogenous variable is binomial:
        .. math::
           log_p_{i} = (\ln \Gamma(n+1) -
                 \ln \Gamma(y_i + 1) - \ln \Gamma(n_i - y_i +1) + y_i *
                 \log(\mu_i / (n_i - \mu_i)) + n * \log(1 - \mu_i/n_i))
        where :math:`y_i = Y_i * n_i` with :math:`Y_i` and :math:`n_i` as
        defined in Binomial initialize.  This simply makes :math:`y_i` the
        original number of successes.
        """
        # special setup
        # see _Setup_binomial(self) in generalized_linear_model.py
        tmp = self.family.initialize(endog, 1)
        endog = tmp[0]
        if np.shape(self.family.n) == () and self.family.n == 1:
            return scale * (endog * np.log(old_div(mu, (1 - mu)) + 1e-200) +
                            np.log(1 - mu)).reshape(-1, )
        else:
            y = endog * self.family.n  # convert back to successes
            return scale * (special.gammaln(self.family.n + 1) -
                            special.gammaln(y + 1) -
                            special.gammaln(self.family.n - y + 1) +
                            y * np.log(old_div(mu, (1 - mu))) +
                            self.family.n * np.log(1 - mu)).reshape(-1, )
Beispiel #9
0
def update_peaks_fit_regression(data, NE_date_ranges, NE_var, data_name_var,
                                round_date_var, peak_date_var,
                                peak_date_buffer, scalar_vars, formula,
                                max_iter, l2_weight, regression_type):
    """
    Randomly update peak times and fit regression.
    """
    NE_peak_dates_i = NE_date_ranges.apply(lambda x: np.random.choice(x, 1)[0])
    NE_peak_dates_i_df = NE_peak_dates_i.reset_index().rename(
        columns={0: peak_date_var})
    #         data_peak_dates_i = data_date_ranges.apply(lambda x: np.random.choice(x, 1)[0]).reset_index().rename(columns={0 : peak_date_var})
    data_i = pd.merge(data,
                      NE_peak_dates_i_df,
                      on=[NE_var, data_name_var],
                      how='inner')
    # reassign peaks
    data_i = data_i.assign(
        **{
            'pre_peak': (
                data_i.loc[:, round_date_var] <= data_i.loc[:, peak_date_var] -
                peak_date_buffer).astype(int),
            'post_peak': (
                data_i.loc[:, round_date_var] >= data_i.loc[:, peak_date_var] +
                peak_date_buffer).astype(int),
            'during_peak':
            ((data_i.loc[:, round_date_var] > data_i.loc[:, peak_date_var] -
              peak_date_buffer)
             & (data_i.loc[:, round_date_var] < data_i.loc[:, peak_date_var] +
                peak_date_buffer)).astype(int),
        })
    # add days since post-peak
    data_i = data_i.assign(
        **{
            'since_peak':
            data_i.loc[:, 'post_peak'] *
            (data_i.loc[:, round_date_var] - data_i.loc[:, peak_date_var])
        })
    # Z-norm all scalar vars
    scaler = StandardScaler()
    for v in scalar_vars:
        data_i = data_i.assign(
            **
            {v: scaler.fit_transform(data_i.loc[:, v].values.reshape(-1, 1))})
    model_full = GLM.from_formula(formula,
                                  data_i,
                                  family=Binomial(link=logit()))
    logging.debug(
        '%d/%d/%d pre/during/post data' %
        (data_i.loc[:, 'pre_peak'].sum(), data_i.loc[:, 'during_peak'].sum(),
         data_i.loc[:, 'post_peak'].sum()))
    if (regression_type == 'regularized_logit'):
        model_res_full = model_full.fit_regularized(maxiter=max_iter,
                                                    method='elastic_net',
                                                    alpha=l2_weight,
                                                    L1_wt=0.0)
        model_res_full_err = compute_err_data(model_res_full)
        err = model_res_full_err.loc[:, 'SE']
    else:
        model_res_full = model_full.fit()
        err = model_res_full.bse
    params = model_res_full.params
    return params, err, NE_peak_dates_i
def test_weights(data, dep_var, cat_vars, scalar_vars, l2_weights):
    indep_formula = ' + '.join(
        ['C(%s)' % (cap_cat_var)
         for cap_cat_var in cap_cat_vars] + scalar_vars)
    formula = '%s ~ %s' % (dep_var, indep_formula)
    # convert raw data to exogenous data
    # need to do this to force train/test
    # to have same features
    data_rand = data.copy()
    np.random.shuffle(data_rand.values)
    model_dummy = GLM.from_formula(formula,
                                   data_rand,
                                   family=Binomial(link=logit()))
    exog = model_dummy.exog
    exog_names = model_dummy.exog_names
    endog = model_dummy.endog
    # generate cross validation folds
    cross_val_folds = 10
    N = data_rand.shape[0]
    cross_val_chunk_size = float(N) / cross_val_folds
    cross_val_fold_train_idx = [
        list(
            range(int(floor(i * cross_val_chunk_size)),
                  int(ceil((i + 1) * cross_val_chunk_size))))
        for i in range(cross_val_folds)
    ]
    cross_val_fold_test_idx = [
        list(range(0, int(ceil(i * cross_val_chunk_size)))) +
        list(range(int(floor((i + 1) * cross_val_chunk_size)), N))
        for i in range(cross_val_folds)
    ]
    weight_likelihoods = []
    for l2_weight in l2_weights:
        print('testing weight = %.3f' % (l2_weight))
        likelihoods_l2 = []
        for i, (train_idx_i, test_idx_i) in enumerate(
                zip(cross_val_fold_train_idx, cross_val_fold_test_idx)):
            print('fold %d' % (i))
            train_XY = data_rand.iloc[train_idx_i, :]
            test_X = exog[test_idx_i, :]
            test_Y = endog[test_idx_i]
            # fit model
            model_i = GLM.from_formula(formula,
                                       train_XY,
                                       family=Binomial(link=logit()))
            model_res_i = model_i.fit_regularized(maxiter=max_iter,
                                                  method='elastic_net',
                                                  alpha=l2_weight,
                                                  L1_wt=0.)
            # add 0 params for missing coefficients
            # to match X shape
            model_res_i.params = model_res_i.params.loc[exog_names].fillna(
                0, inplace=False)
            # score test data
            likelihood_i = compute_log_likelihood(model_res_i.params, test_Y,
                                                  test_X)
            likelihoods_l2.append(likelihood_i)
        weight_likelihoods.append(likelihoods_l2)
    weight_likelihoods = pd.DataFrame(np.array(weight_likelihoods),
                                      index=l2_weights)
    mean_weight_likelihoods = weight_likelihoods.mean(axis=0)
    return mean_weight_likelihoods
def run_regression(data,
                   formula,
                   regression_type,
                   dep_var='anchor',
                   out_dir='../../output',
                   split_var=None,
                   split_var_val=0):
    """
    Run logit regression on data with given formula 
    and write to file.
    Option: use regularized logit (reduce variable inflation).
    
    :param data: full data
    :param formula: regression formula
    :param regression_type: type of regression (logit|regularized_logit)
    :param dep_var: dependent variable
    :param out_dir: output directory
    :param split_var: optional variable to split data (e.g. only organization accounts)
    :param split_var_val: value of split value variable (if included)
    """
    l2_weight = 0.01
    max_iter = 100
    model_full = GLM.from_formula(formula, data, family=Binomial(link=logit()))
    if (regression_type == 'regularized_logit'):
        model_res_full = model_full.fit_regularized(maxiter=max_iter,
                                                    method='elastic_net',
                                                    alpha=l2_weight,
                                                    L1_wt=0.0)
    else:
        model_res_full = model_full.fit()

    ## summary stats
    model_res_full_err = compute_err_data(model_res_full)
    # write to file
    reg_out_str = 'anchor_%s_output_%s.tsv' % (regression_type,
                                               formula.replace(' ', ''))
    if (split_var is not None):
        reg_out_str = 'anchor_%s_output_%s_split_%s=%s.tsv' % (
            regression_type, formula.replace(' ',
                                             ''), split_var, split_var_val)
    res_out_file = os.path.join(out_dir, reg_out_str)
    model_res_full_err.to_csv(res_out_file, sep='\t', index=True)

    ## save coeffs to file => pretty print as latex
    # need lots of decimal points! for multiple variable correction
    pd.options.display.float_format = '{:,.5f}'.format
    tex_out_str = reg_out_str.replace('.tsv', '.tex')
    tex_res_out_file = os.path.join(out_dir, tex_out_str)
    model_res_full_err = model_res_full_err.assign(
        **{'coeff': model_res_full_err.index})
    tex_data_cols = ['coeff', 'mean', 'SE', 'p_val']
    model_res_full_err.to_latex(tex_res_out_file,
                                columns=tex_data_cols,
                                index=False)

    ## compute regression fit parameters => deviance, AIC, etc.
    # start with chi2 test against null model
    llr, model_df, p_val = compute_chi2_null_test(model_res_full, data,
                                                  dep_var, max_iter, l2_weight)
    logging.debug('N=%d, LLR=%.5f, df=%d, p-val=%.3E' %
                  (data.shape[0], llr, model_df, p_val))
    # variance inflation factor: are some of the covariates highly collinear?
    # for sanity we only look at non-categorical vars
    cat_var_matcher = re.compile(
        'C\(.+\)\[T\..+\]|Intercept'
    )  # format="C(var_name)[T.var_val]" ("C(username)[T.barackobama]")
    non_cat_params = [
        param for param in model_res_full.params.index
        if cat_var_matcher.search(param) is None
    ]
    for param in non_cat_params:
        VIF_i = compute_VIF(model_res_full, param)
        logging.debug('VIF test: param=%s, VIF=%.3f' % (param, VIF_i))

    ## compute accuracy on k-fold classification
    ## we would use R-squared but that doesn't work for logistic regression
    # first get data into usable format
    n_splits = 10
    accs = k_fold_acc(model_full.exog, model_full.endog, k=n_splits)
    mean_acc = np.mean(accs)
    se_acc = np.std(accs) / n_splits**.5
    logging.debug('%d-fold mean accuracy = %.3f +/- %.3f' %
                  (n_splits, mean_acc, se_acc))
Beispiel #12
0
def irls_incremental_dm(filename,
                        chunksize,
                        yPos=None,
                        family=Binomial(),
                        link=Logit(),
                        maxit=25,
                        tol=1e-08,
                        header=None,
                        headerNames=None):
    x = None
    nRows = chunksize
    tmp = pd.read_csv(filename,
                      delimiter=',',
                      header=None,
                      nrows=1,
                      parse_dates=[1])
    nCols = tmp.shape[1] - 1
    if yPos is None:
        if header is None:
            yPos = nCols
        else:
            yPos = tmp[nCols].to_string(index=False)[1:]
    for j in range(1, maxit + 1):
        generator = _generator(filename=filename,
                               header=header,
                               chunk_size=nRows)
        if x is None: x = np.zeros(nCols)
        ATWA = np.zeros((nCols, nCols))
        ATWz = np.zeros(nCols)
        for rowA in generator:
            yb = np.asarray(rowA[yPos].astype(np.float32))
            A = np.asarray(rowA.drop([yPos], axis=1), dtype=np.float32)
            eta = np.matmul(A, x)
            eta = eta.reshape(len(eta))
            g = link.inverse(eta)
            mu_eta = link.inverse_deriv
            gprime = mu_eta(eta)
            z = np.array(eta + (yb - g) / gprime)
            varianceFam = family.variance
            linkinvFam = link.inverse
            g = linkinvFam(eta)
            varg = varianceFam(g)
            W = gprime**2 / varg
            W = W.reshape(len(W), 1)
            cross2 = np.matmul(np.transpose(A), np.asarray(W.reshape(-1) * z))
            ATWz = ATWz + cross2
            cross1 = np.matmul(np.transpose(A), np.asarray(W * A))
            ATWA = ATWA + cross1
        xold = x
        C, rank, piv = cholesky_pivot(ATWA, full_pivot=True)
        if rank < C.shape[1]:
            raise LinAlgError("Rank-deficiency detected.")
        x = solve_triangular(np.transpose(C), ATWz[piv - 1], lower=True)
        x = solve_triangular(C, x, lower=False)[piv - 1]
        if (np.sqrt(np.matmul(np.transpose(x - xold), x - xold)) < tol): break
    if headerNames is not None:
        x = pd.DataFrame(x, headerNames)
    elif header is not None:
        x = pd.DataFrame(x, index=list(rowA.drop([yPos], axis=1)))
    else:
        x = pd.DataFrame(x)
    return (x, j)
Beispiel #13
0
def glm_svd_newton_dm(X,
                      y,
                      family=Binomial(),
                      link=Logit(),
                      maxit=25,
                      tol=1e-08,
                      stol=1e-08,
                      singular_ok=True,
                      weights=None,
                      reg_method="column projection"):
    S = list(svd(X))  # S[0]=u S[1]=d S[2]=v
    V = S[2]
    nVars = S[2].shape[1]
    idx = np.arange(nVars)
    i = (S[1] / S[1][0]) > stol
    k = np.sum(i)
    pivot = np.arange(nVars)
    if (k < nVars):
        if reg_method == "column projection":
            Q, R, pivot = qr(S[2][:, :k], pivoting=True)
            idx = np.argsort(pivot[:k])
            omit = pivot[-(nvars - k):]
            S_new = svd(X[~idx.isin(omit)])
            if ((S_new[-1] / S_new[0]) <= stol):
                print(
                    "Whoops! SVD subset selection failed, trying dqrdc2 on full matrix"
                )
            if (len(X) == 3):
                Q = np.matmul(S[2], S[1] + S[0].T)
            else:
                Q, R, pivot = qr(X, pivoting=True)
            pivot = Q[2]
            idx = np.argsort(pivot[:k])
            omit = pivot[-(nvars - k):]
            S_new = svd(X[~idx.isin(omit)])
        S = S_new
        print("omitting column(s) ", omit)

    s = np.zeros(nVars)
    nobs = y.shape[0]
    nVars = S[2].shape[1]
    if weights is None:
        weights = np.ones(nobs)
    varianceFam = family.variance
    linkinvFam = link.inverse
    mu_eta = link.inverse_deriv
    etastart = None
    if len(y.shape) == 1:
        mustart = (weights * y + 0.5) / (weights + 1)
    else:
        n = y + weights
        ytmp = 0 if not n else y[:, 1] / n
        mustart = (n * ytmp + 0.5) / (n + 1)
    eta = link(mustart)
    dev_resids = lambda y, m, w: family.resid_dev(y, m, w)**2
    dev = sum(dev_resids(y, linkinvFam(eta), weights))
    devold = 0
    for j in range(maxit):
        g = linkinvFam(eta)
        varg = varianceFam(g)
        if (np.any(np.isnan(varg))):
            raise LinAlgError("NAs in variance of the inverse link function")
        if (np.any(varg == 0)):
            raise LinAlgError(
                "Zero value in variance of the inverse link function")
        gprime = mu_eta(eta)
        if (np.any(np.isnan(gprime))):
            raise LinAlgError("NAs in the inverse link function derivative")
        z = eta + (y - g) / gprime
        W = weights * (gprime**2 / varg)
        W = W.reshape(len(W), 1)
        cross1 = np.matmul(np.transpose(S[0][:, :7]), W * S[0][:, :7])
        C, rank_bn, piv = cholesky_pivot(cross1, full_pivot=True)
        cross2 = np.matmul(np.transpose(np.asarray(S[0][:, :7])),
                           np.asarray(W.reshape(-1) * z))[piv - 1]
        s = solve_triangular(np.transpose(C), cross2, lower=True)
        s = solve_triangular(C, s, lower=False)[np.argsort(piv)]
        eta = np.matmul(S[0][:, :7], s)
        dev = np.sum(dev_resids(y, g, weights))
        if (np.absolute(dev - devold) / (0.1 + np.absolute(dev)) < tol): break
        devold = dev
    x = np.empty(X.shape[1])
    x[:] = np.nan
    inV = 1 / S[1]
    if (reg_method == "minimum norm"): inV[inV > 1 / stol] = 1
    x[idx] = np.matmul(S[2].T, (s * inV).reshape(-1, 1)).reshape(-1)
    x = pd.DataFrame(x, list(X))
    return (x, j + 1, k, pivot
            )  # coefficients=x,iterations=j, rank=k, pivot=pivot
Beispiel #14
0
 def __init__(self, link=L.logit):  # , n=1.):
     # TODO: it *should* work for a constant n>1 actually, if data_weights
     # is equal to n
     self.family = Binomial(link=link)
Beispiel #15
0
class BinomialWrapper(FamilyWrapper):
    """
    The wrapper of Binomial exponential family distribution,
    with function for per sample probability.
    Parameters
    ----------
    link : a link instance, optional
        The default link for the Binomial family is the logit link.
        Available links are logit, probit, cauchy, log, and cloglog.
        See statsmodels.family.links for more information.
    Attributes
    ----------
    family : a statsmodel Binomial family object
    --------
    """

    def __init__(self, link=L.logit):  # , n=1.):
        # TODO: it *should* work for a constant n>1 actually, if data_weights
        # is equal to n
        self.family = Binomial(link=link)

    def loglike_per_sample(self, endog, mu, scale=1.):
        """
        The function to calculate log-likelihood per sample
        in terms of the fitted mean response.
        Parameters
        ----------
        endog : array-like of shape (n, k) or (n, )
            Endogenous response variable
        mu : array-like of shape (n, )
            Fitted mean response variable
        scale : float, optional
            Not used for the Binomial GLM.
        Returns
        -------
        log_p : array-like of shape (n, )
            The value of the loglikelihood function evaluated per sample
            (endog,mu,freq_weights,scale) as defined below.
        Notes
        --------
        If the endogenous variable is binary:
        .. math::
         log_p_{i} = (y_i * \log(\mu_i/(1-\mu_i)) + \log(1-\mu_i))
        If the endogenous variable is binomial:
        .. math::
           log_p_{i} = (\ln \Gamma(n+1) -
                 \ln \Gamma(y_i + 1) - \ln \Gamma(n_i - y_i +1) + y_i *
                 \log(\mu_i / (n_i - \mu_i)) + n * \log(1 - \mu_i/n_i))
        where :math:`y_i = Y_i * n_i` with :math:`Y_i` and :math:`n_i` as
        defined in Binomial initialize.  This simply makes :math:`y_i` the
        original number of successes.
        """
        # special setup
        # see _Setup_binomial(self) in generalized_linear_model.py
        tmp = self.family.initialize(endog, 1)
        endog = tmp[0]
        if np.shape(self.family.n) == () and self.family.n == 1:
            return scale * (endog * np.log(old_div(mu, (1 - mu)) + 1e-200) +
                            np.log(1 - mu)).reshape(-1,)
        else:
            y = endog * self.family.n  # convert back to successes
            return scale * (special.gammaln(self.family.n + 1) -
                            special.gammaln(y + 1) -
                            special.gammaln(self.family.n - y + 1) + y *
                            np.log(old_div(mu, (1 - mu))) + self.family.n *
                            np.log(1 - mu)).reshape(-1,)