Esempio n. 1
0
def fitZINB(preCellType,postCellType):
    
    # Get data from file
    filename = "data_dense_model\%s_%s.csv" % (preCellType,postCellType)
    df = pd.read_csv(filename,header=None,names=["data"])
    
    # Prepare data for fitting
    X = df.data
    nobs = len(X)
    exog = np.ones(nobs)
    freq = np.bincount(X) / nobs
    binValue = list(range(0,len(freq)))
    
    # Fit Data
    mod_ZINB = sm.ZeroInflatedNegativeBinomialP(X, exog)
    res_ZINB = mod_ZINB.fit(disp=False)
    
    # Get fitting results
    probs_zinb = res_ZINB.predict(which='prob')
    probsm_zinb = probs_zinb.mean(0)
    
    # Export freq and probsm_zinb
    values = {'x': freq,
                'xFit': probsm_zinb}
    outputDF = DataFrame(values, columns= ['x', 'xFit'])
    outputfilename = "fit_dense_model\%s_%s_ZINB.csv" % (preCellType,postCellType)
    export_csv = outputDF.to_csv (outputfilename,index=None,header=True)
    
    # Export fit results
    X = res_ZINB.summary().as_csv()
    outputfilenameFit = "fit_dense_model\%s_%s_ZINB_FitResults.csv" % (preCellType,postCellType)
    text_file = open(outputfilenameFit, "w")
    n = text_file.write(X)
    text_file.close()
def tiny_zinb(l):
    zinb_mod, zinb_pred = [None for i in range(2)]
    zinb_rmse = 0

    xtr = np.array([item[1:] for item in l])
    ytr = np.array([item[0] for item in l]).reshape(-1, 1)

    try:
        if np.count_nonzero(ytr) > 0:
            zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytr, xtr).fit_regularized(maxiter=10000, disp=0, maxfun=10000) #nm va
            # print(zinb_mod.summary())
            zinb_pred = zinb_mod.predict(xtr, which="mean", exog_infl=np.ones((len(xtr), 1)))
            zinb_rmse = np.sqrt(mean_squared_error(ytr, zinb_pred))
            zinb_res = [zinb_mod, zinb_pred, zinb_rmse]
        else:
            zinb_res = return_zeros(ytr, "AllZeros")

    except np.linalg.LinAlgError as e:
        if 'Singular matrix' in str(e):
            # print(" You should not have reached this point. ")
            # print(" Regularization should avoid the singular matrix. ")
            nzeros = len(ytr) - np.count_nonzero(ytr)
            zinb_res = return_zeros(ytr, "Singular")
            prop = round((100 * nzeros) / len(ytr), 2)
            # print(" Proportion of zeros: ", prop)
            zinb_prop_err_singmat.append(prop)
    except AssertionError as e:
        zinb_res = return_zeros(ytr, "Assert")
    except ValueError as e:
        print("\t\t\tIgnored output containing np.nan or np.inf")
        pass
    return zinb_res
Esempio n. 3
0
def test_poi_nb_zip_zinb_tiny_subset(meta, m):
    exog_names = r"rowid;latitude;longitude;target;dbuiltup;dforest;drecreation;dbrr;dwrl;dwrn;dwrr;dcamping;dcaravan;dcross;dgolf;dheem;dhaven;dsafari;dwater;attr;dbath;lu;lc;maxmeanhaz;maxstdhaz".split(";")[4:]

    np.random.seed(2)

    randint = np.random.randint(0, high=len(m)-1, size=800)

    msel = m[randint,:]

    Y = msel[:, 0]
    X = msel[:, 1:]

    # Ynz, Xnz = trim_value(Y, X, 0)

    print("Msel shape: ", msel.shape)

    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=0.60, random_state=42)

    print(xtrain.shape, ytrain.shape, xtest.shape, ytest.shape)

    print
    print("Model: Poisson")
    poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50)
    poi_mean_pred = poi_mod.predict(xtest)
    poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred)
    poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs))
    # print(np.unique(poi_ppf_obs, return_counts=True))
    print("RMSE Poisson: ", poi_rmse)
    # print(poi_mod.summary(yname='tickbites', xname=exog_names))

    print
    print("Model: Neg. Binomial")
    nb_mod = sm.NegativeBinomial(ytrain, xtrain).fit(start_params = None, method = 'newton', maxiter=50)
    nb_pred = nb_mod.predict(xtest)
    nb_rmse = np.sqrt(mean_squared_error(ytest, nb_pred))
    # print(np.unique(nb_pred, return_counts=True))
    print("RMSE Negative Binomial: ", nb_rmse)

    print
    print("Model: Zero Inflated Poisson")
    zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton", maxiter=50)
    zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
    zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred)
    zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs))
    print("RMSE Zero-Inflated Poisson", zip_rmse)

    print
    print("Model: Zero Inflated Neg. Binomial")
    zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50)
    zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
    zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred))
    print("RMSE Zero-Inflated Negative Binomial: ", zinb_rmse)
Esempio n. 4
0
def test_poi_nb_zip_zinb_raw_data(meta, m):
    Y = m[:, 0]
    X = m[:, 1:]
    Ynz, Xnz = trim_value(Y, X, 0)
    xtrain, xtest, ytrain, ytest = train_test_split(X, Y, train_size=0.60, random_state=77)

    print("Training with: ", xtrain.shape, ytrain.shape)
    print("Testing with: ", xtest.shape, ytest.shape)

    print()
    print("Model: Poisson")
    poi_mod = sm.Poisson(ytrain, xtrain).fit(method="newton", maxiter=50)
    poi_mean_pred = poi_mod.predict(xtest)
    poi_ppf_obs = stats.poisson.ppf(q=0.95, mu=poi_mean_pred)
    poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs))

    print("Model: Zero Inflated Poisson")
    zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton", maxiter=50)
    zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
    zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred)
    zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs))

    print("Model: Zero Inflated Neg. Binomial")
    zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50)
    zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
    zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred))

    print()
    print("Model: Zero Inflated Neg. Binomial")
    zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain, xtrain).fit(method="newton", maxiter=50)
    zinb_pred = zinb_mod.predict(xtest)
    zinb_rmse = np.sqrt(mean_squared_error(ytrain, zinb_pred))

    print("RMSE Poisson: ", poi_rmse)
    print("RMSE Negative Binomial: ", nb_rmse)
    print("RMSE Zero-Inflated Poisson", zip_rmse)
    print("RMSE Zero-Inflated Negative Binomial: ", zinb_rmse)
Esempio n. 5
0
        def setup_class(cls):
            data = sm.datasets.randhie.load(as_pandas=False)

            cls.endog = data.endog
            exog = data.exog
            start_params = np.array(
                [-2.83983767, -2.31595924, -3.9263248 , -4.01816431, -5.52251843,
                -2.4351714 , -4.61636366, -4.17959785, -0.12960256, -0.05653484,
                -0.21206673,  0.08782572, -0.02991995,  0.22901208,  0.0620983 ,
                 0.06809681,  0.0841814 ,  0.185506  ,  1.36527888])
            mod = sm.ZeroInflatedNegativeBinomialP(
                cls.endog, exog, exog_infl=exog, p=2)
            res = mod.fit(start_params=start_params, method="bfgs",
                          maxiter=1000, disp=0)

            cls.res = res
Esempio n. 6
0
    def setup_class(cls):

        expected_params = [1, 1, 0.5]
        np.random.seed(987123)
        nobs = 500
        exog = np.ones((nobs, 2))
        exog[:nobs//2, 1] = 0

        prob_infl = 0.15
        mu_true = np.exp(exog.dot(expected_params[:-1]))
        cls.endog = sm.distributions.zinegbin.rvs(mu_true,
                    expected_params[-1], 2, prob_infl, size=mu_true.shape)
        model = sm.ZeroInflatedNegativeBinomialP(cls.endog, exog, p=2)
        cls.res = model.fit(method='bfgs', maxiter=5000, maxfun=5000, disp=0)

        # attach others
        cls.prob_infl = prob_infl
 def setup_class(cls):
     data = sm.datasets.randhie.load(as_pandas=False)
     cls.endog = data.endog
     exog = sm.add_constant(data.exog[:,1], prepend=False)
     exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
     # cheating for now, parameters are not well identified in this dataset
     # see https://github.com/statsmodels/statsmodels/pull/3928#issuecomment-331724022
     sp = np.array([1.88, -10.28, -0.20, 1.14, 1.34])
     cls.res1 = sm.ZeroInflatedNegativeBinomialP(data.endog, exog,
         exog_infl=exog_infl, p=2).fit(start_params=sp, method='nm',
                                       xtol=1e-6, maxiter=5000, disp=0)
     # for llnull test
     cls.res1._results._attach_nullmodel = True
     cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset', 'p']
     cls.init_kwds = {'inflation': 'logit', 'p': 2}
     res2 = RandHIE.zero_inflated_negative_binomial
     cls.res2 = res2
 def setup_class(cls):
     data = sm.datasets.randhie.load()
     cls.endog = data.endog
     exog = sm.add_constant(data.exog[:, 1], prepend=False)
     exog_infl = sm.add_constant(data.exog[:, 0], prepend=False)
     cls.res1 = sm.ZeroInflatedNegativeBinomialP(data.endog,
                                                 exog,
                                                 exog_infl=exog_infl,
                                                 p=2).fit(method='nm',
                                                          maxiter=500)
     # for llnull test
     cls.res1._results._attach_nullmodel = True
     cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset', 'p']
     cls.init_kwds = {'inflation': 'logit', 'p': 2}
     res2 = RandHIE()
     res2.zero_inflated_negative_binomial()
     cls.res2 = res2
Esempio n. 9
0
def get_oinb_estimate(scTRIP):
    keys = scTRIP.keys()
    key_list = []
    mu_list = []
    alpha_list = []
    mean_list = []
    median_list = []
    auc_list = []
    var_list = []
    ncells_list = []
    counts_list = []
    sum_list = []
    for key in keys:
        logger.info(f'we are dealing with cell {key}')
        counts = list(scTRIP[key])
        if max(counts) != 0:
            key_list.append(key)
            counts = pd.Series(counts)
            res = sm.ZeroInflatedNegativeBinomialP(
                counts, np.ones_like(counts)).fit(maxiter=200)
            alpha_list.append(res.params['alpha'])
            mu_list.append(res.params['const'])
            # Get the original list with ones
            zero_counts = [a for a in counts if a == 0]
            non_zero_counts = [a for a in counts if a > 0]
            zero_counts = [x + 1 for x in zero_counts]
            #original_counts = zero_counts + non_zero_counts
            original_counts = counts  #Keep normalized coutns
            ncells_list.append(len(counts))
            counts_list.append(list(counts))
            mean_list.append(np.mean(original_counts))
            median_list.append(np.median(original_counts))
            sum_list.append(np.sum(original_counts))
            var_list.append(np.var(original_counts))
            auc_list.append(get_auc(original_counts))
    pop_df = pd.DataFrame([
        key_list, mean_list, median_list, var_list, auc_list, mu_list,
        alpha_list, ncells_list, counts_list, sum_list
    ])
    pop_df = pop_df.transpose()
    pop_df.columns = [
        'tBC', 'mean', 'median', 'var', 'auc', 'mu', 'alpha', 'ncells',
        'counts', 'sum'
    ]
    return pop_df
Esempio n. 10
0
def tiny_zinb(l):
    print("\t\tRunning Zero-Inflated NegBin")
    zinb_mod, zinb_pred = [None for i in range(2)]
    zinb_rmse = 0

    xtr = np.array([item[1:] for item in l])
    ytr = np.array([item[0] for item in l]).reshape(-1, 1)

    try:
        zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytr, xtr).fit(method="newton", maxiter=50)
        zinb_pred = zinb_mod.predict(xtr, exog_infl=np.ones((len(xtr), 1)))
        zinb_rmse = np.sqrt(mean_squared_error(ytr, zinb_pred))

    except np.linalg.LinAlgError as e:
        if 'Singular matrix' in str(e):
            print("\t\t\tIgnored a singular matrix.")
    except ValueError:
        print("\t\t\tIgnored output containing np.nan or np.inf")
    return [zinb_mod, zinb_pred, zinb_rmse]
Esempio n. 11
0
poi_rmse = np.sqrt(mean_squared_error(ytest, poi_ppf_obs))

print("Model: Neg. Binomial")
nb_mod = sm.NegativeBinomial(ytrain, xtrain).fit(start_params=None,
                                                 method='newton',
                                                 maxiter=50)
nb_pred = nb_mod.predict(xtest)
nb_rmse = np.sqrt(mean_squared_error(ytest, nb_pred))

print(np.ones(len(xtest)).shape)

print("Model: Zero Inflated Poisson")
zip_mod = sm.ZeroInflatedPoisson(ytrain, xtrain).fit(method="newton",
                                                     maxiter=50)
zip_mean_pred = zip_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
zip_ppf_obs = stats.poisson.ppf(q=0.95, mu=zip_mean_pred)
zip_rmse = np.sqrt(mean_squared_error(ytest, zip_ppf_obs))

print("Model: Zero Inflated Neg. Binomial")
zinb_mod = sm.ZeroInflatedNegativeBinomialP(ytrain,
                                            xtrain).fit(method="newton",
                                                        maxiter=50)
zinb_pred = zinb_mod.predict(xtest, exog_infl=np.ones((len(xtest), 1)))
zinb_rmse = np.sqrt(mean_squared_error(ytest, zinb_pred))

print(xtrain.shape, ytrain.shape, xtest.shape, ytest.shape)

print("RMSE Poisson: ", poi_rmse)
print("RMSE Neg. Bin.: ", nb_rmse)
print("RMSE ZIP", zip_rmse)
print("RMSE ZINB: ", zinb_rmse)