def test_nb2(): seed = 1234 rng = np.random.default_rng(seed) n_obs, n_var, n_nnz, rsq, k = 2000, 20, 4, 0.9**2, 4.0 X = exact_rmvnorm(np.eye(n_var), n=n_obs, seed=seed) beta = np.zeros(n_var) bv = np.array([-1.0, -0.5, 0.5, 1.0]) bvals = np.tile(bv, n_nnz // len(bv)) if n_nnz % len(bv) > 0: bvals = np.concatenate([bvals, bv[:n_nnz % len(bv)]]) beta[:n_nnz] = bvals eta = X.dot(beta) / np.sqrt(np.sum(beta**2)) lpred = rng.normal(eta, scale=np.sqrt(eta.var() * (1.0 - rsq) / rsq)) mu = np.exp(lpred) var = mu + k * mu**2 n = -mu**2 / (mu - var) p = mu / var y = rng.negative_binomial(n=n, p=p) xcols = [f"x{i}" for i in range(1, n_var + 1)] data = pd.DataFrame(X, columns=xcols) data['y'] = y formula = "y~1+" + "+".join(xcols) model = NegativeBinomial(formula=formula, data=data) params_init = model.params.copy() + 0.01 model.fit() params = model.params.copy() theta = np.array([ 0.13049303, -0.64878454, -0.30956394, 0.2903795, 0.58677555, -0.03022705, 0.03989469, 0.01182953, -0.00498391, 0.00788808, -0.04198716, -0.00162041, 0.01523861, -0.00401566, -0.02547227, -0.07309814, -0.05574522, 0.00938691, -0.0034148, -0.01254539, -0.05221309, 1.41286364 ]) g_num1 = fo_fc_cd(model.loglike, params_init) g_ana1 = model.gradient(params_init) g_num2 = fo_fc_cd(model.loglike, params) g_ana2 = model.gradient(params) H_num1 = so_gc_cd(model.gradient, params_init) H_ana1 = model.hessian(params_init) H_num2 = so_gc_cd(model.gradient, params) H_ana2 = model.hessian(params) assert (np.allclose(model.params, theta)) assert (np.allclose(g_num1, g_ana1)) assert (np.allclose(g_num2, g_ana2, atol=1e-4)) assert (np.allclose(H_num1, H_ana1)) assert (np.allclose(H_num2, H_ana2)) assert (model.opt_full.success)
def test_holzingercfa(): data = pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/sem/HS.data.csv", index_col=0) df = data.loc[:, ["visual", "cubes", "flags", "paragrap", "sentence", "wordm", "addition", "counting", "straight"]] df = df / np.array([6.,4.0, 8., 3., 4., 7., 23., 20., 36.]) Lambda = np.zeros((9, 3)) Lambda[0:3, 0] = 1.0 Lambda[3:6, 1] = 1.0 Lambda[6:9, 2] = 1.0 Lambda = pd.DataFrame(Lambda, index=df.columns, columns=['visual', 'textual', 'speed']) Beta = pd.DataFrame(np.zeros((3, 3)), index=Lambda.columns, columns=Lambda.columns) Phi=Beta+np.eye(3)+0.05 Psi = pd.DataFrame(np.eye(9), index=df.columns, columns=df.columns) model = SEM(Lambda, Beta, Phi, Psi, data=df) model.fit() theta = np.array([0.55372 , 0.729526, 1.113068, 0.926117, 1.180358, 1.083565, 0.809095, 0.408174, 0.261583, 0.979517, 0.173783, 0.383061, 0.549275, 1.133711, 0.844258, 0.371148, 0.446243, 0.356234, 0.796578, 0.488285, 0.567506]) assert(np.allclose(model.theta, theta)) assert(model.opt.success) assert(np.abs(model.opt.grad).max()<1e-5) assert(np.allclose(model.gradient(theta+0.1), fo_fc_cd(model.loglike, theta+0.1))) assert(np.allclose(model.hessian(theta), so_gc_cd(model.gradient, theta)))
def test_gauls(): n_obs = 20000 df = pd.DataFrame(np.zeros((n_obs, 4)), columns=['x0', 'x1', 'x2', 'y']) df['x0'] = rng.choice(np.arange(5), size=n_obs, p=np.ones(5)/5) df['x1'] = rng.uniform(-1, 1, size=n_obs) df['x2'] = rng.uniform(-1, 1, size=n_obs) u0 = dummy(df['x0']).dot(np.array([-0.2, 0.2, -0.2, 0.2, 0.0])) f1 = (3.0 * df['x1']**3 - 2.43 * df['x1']) f2 = -(3.0 * df['x2']**3 - 2.43 * df['x2']) f3 = (df['x2'] - 1.0) * (df['x2'] + 1.0) eta = u0 + f1 + f2 mu = eta.copy() tau = 1.0 / (np.exp(f3) + 0.01) df['y'] = rng.normal(loc=mu, scale=tau) model = GauLS("y~C(x0)+s(x1, kind='cr')+s(x2, kind='cr')", "y~1+s(x2, kind='cr')", df) model.fit() assert(model.opt.success==True) assert((np.abs(model.opt.grad)<1e-6).all()) theta = np.array([4.235378, 4.165951, 7.181457]) assert(np.allclose(theta, model.theta)) eps = np.finfo(float).eps**(1/4) grad_close = np.allclose(model.gradient(theta), fo_fc_cd(model.reml, theta), atol=eps, rtol=eps) hess_close = np.allclose(model.hessian(theta), so_gc_cd(model.gradient, theta), atol=eps, rtol=eps) assert(grad_close) assert(hess_close)
def test_zip(): seed = 1234 rng = np.random.default_rng(seed) X = exact_rmvnorm(vine_corr(3, 5, seed=seed), seed=seed) Z = exact_rmvnorm(vine_corr(2, 5, seed=seed), seed=seed) b, a = rng.normal(0.0, 0.5, size=(3)), rng.normal(size=(2)) u = np.exp(Z.dot(a)) prob = u / (1.0 + u) ybin = rng.binomial(1, p=prob) y = np.zeros(len(ybin), dtype=float) mu = np.exp(X.dot(b)) q = rng.poisson(mu[ybin == 1]) y[ybin == 1] = q * 1.0 model = ZIP(X, y, Z) model.fit(opt_kws=dict(verbose=3, gtol=1e-9, xtol=1e-200)) theta = np.array( [-0.81434379, 0.02346997, 0.34179484, 0.00402158, -0.82916627]) g1 = fo_fc_cd(model.loglike, model.params * 0.98) g2 = model.gradient(model.params * 0.98) H1 = so_gc_cd(model.gradient, model.params * 0.98) H2 = model.hessian(model.params * 0.98) assert (np.allclose(g1, g2)) assert (np.allclose(H1, H2)) assert (np.allclose(model.params, theta))
def test_clm(): SEED = 1234 rng = np.random.default_rng(SEED) n_obs, n_var, rsquared = 10_000, 8, 0.25 S = np.eye(n_var) X = exact_rmvnorm(S, n=n_obs, seed=1234) beta = np.zeros(n_var) beta[np.arange(n_var // 2)] = rng.choice([-1., 1., -0.5, 0.5], n_var // 2) var_names = [f"x{i}" for i in range(1, n_var + 1)] eta = X.dot(beta) eta_var = eta.var() scale = np.sqrt((1.0 - rsquared) / rsquared * eta_var) y = rng.normal(eta, scale=scale) df = pd.DataFrame(X, columns=var_names) df["y"] = pd.qcut(y, 7).codes formula = "y~-1+" + "+".join(var_names) model = CLM(frm=formula, data=df) model.fit() theta = np.array([ -2.08417224, -1.08288221, -0.34199706, 0.34199368, 1.08217316, 2.08327387, 0.37275823, 0.37544884, 0.3572407, 0.71165265, 0.0086888, -0.00846944, 0.00975741, 0.01257564 ]) assert (np.allclose(theta, model.params)) params_init, params = model.params_init.copy(), model.params.copy() tol = np.finfo(float).eps**(1 / 3) np.allclose(model.gradient(params_init), fo_fc_cd(model.loglike, params_init)) np.allclose(model.gradient(params), fo_fc_cd(model.loglike, params), atol=tol) np.allclose(model.hessian(params_init), so_gc_cd(model.gradient, params_init)) np.allclose(model.hessian(params), so_gc_cd(model.gradient, params))
def test_nlsy_model(): vechS = [2.926, 1.390, 1.698, 1.628, 1.240, 0.592, 0.929, 0.659, 4.257, 2.781, 2.437, 0.789, 1.890, 1.278, 0.949, 4.536, 2.979, 0.903, 1.419, 1.900, 1.731, 5.605, 1.278, 1.004, 1.000, 2.420, 3.208, 1.706, 1.567, 0.988, 3.994, 1.654, 1.170, 3.583, 1.146, 3.649] S = pd.DataFrame(invech(np.array(vechS)), columns=['anti1', 'anti2', 'anti3', 'anti4', 'dep1', 'dep2', 'dep3', 'dep4']) S.index = S.columns X = pd.DataFrame(exact_rmvnorm(S.values, 180, seed=123), columns=S.columns) X += np.array([1.750, 1.928, 1.978, 2.322, 2.178, 2.489, 2.294, 2.222]) data = pd.DataFrame(X, columns=S.columns) Lambda = pd.DataFrame(np.eye(8), index=data.columns, columns=data.columns) Lambda = Lambda.iloc[[1, 2, 3, 5, 6, 7, 0, 4], [1, 2, 3, 5, 6, 7, 0, 4]] Beta = pd.DataFrame([[0, 0, 0, 0, 0, 0, 1, 1], [1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 1], [1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], ], index=Lambda.columns, columns=Lambda.columns) Phi = pd.DataFrame([[1.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0, 0.0, 0.1, 0.0, 0.0], [0.1, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.1, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.1, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.1], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 1.0], ], index=Lambda.columns, columns=Lambda.columns) Psi = Lambda.copy()*0.0 data = data.loc[:, Lambda.index] model = SEM(Lambda, Beta, Phi, Psi, data=data) model.fit() theta = np.array([ 0.62734 , 0.147299, 0.69399 , 0.318353, 0.058419, 0.344418, -0.088914, 0.151027, 0.443465, -0.027558, 0.074533, 0.542448, 3.581777, 1.500315, 2.70847 , 1.001634, 3.626519, 1.3206 , 3.084899, 2.825085, 2.924854, 2.926 , 1.24 , 3.208 ]) assert(np.allclose(model.theta, theta)) assert(model.opt.success) assert(np.abs(model.opt.grad).max()<1e-5) assert(np.allclose(model.gradient(theta+0.1), fo_fc_cd(model.loglike, theta+0.1))) assert(np.allclose(model.hessian(theta), so_gc_cd(model.gradient, theta)))
def test_efa(): rng = np.random.default_rng(123) n_obs = 10000 n_vars = 15 n_facs = 3 Phi = np.eye(n_facs) Phi[triang_inds(n_facs)] = 0.3 Phi = scale_diag(Phi, np.array([3.0, 2.0, 1.0])) X, S = simulate_factor_model(n_obs=n_obs, n_vars=n_vars, n_facs=n_facs, Phi=Phi, rng=rng) X = (X - X.mean(axis=0)) / X.std(axis=0) model = FactorAnalysis(X, n_factors=n_facs, rotation_method="quartimax") model.fit() x = model.theta + 0.1 g1 = fo_fc_cd(model.loglike, x) g2 = model.gradient(x) grad_close = np.allclose(g1, g2) H1 = model.hessian_approx(x) H2 = model.hessian(x) hess_close = np.allclose(H1, H2) assert(grad_close) assert(hess_close) Lambda = np.array([[ 0.9679300157, 0.0020941483, -0.0037152832], [ 0.9808302280, -0.0009770430, -0.0012631786], [ 0.9656468771, 0.0018262625, -0.0012049879], [ 0.9546006929, -0.0046688628, 0.0069179692], [ 0.9310045011, 0.0018245676, -0.0004898625], [-0.0090650550, 0.9322148607, -0.0013228623], [-0.0034623862, 0.9003171814, -0.0006441778], [ 0.0023954874, 0.9219262395, 0.0013681845], [ 0.0070442678, 0.8307209928, -0.0007938577], [ 0.0089643198, 0.7637942355, 0.0023902527], [-0.0031777987, -0.0020738436, 0.8190368257], [ 0.0060579383, 0.0070006435, 0.8130428008], [-0.0041559830, -0.0117483714, 0.6734727957], [-0.0003143868, -0.0002077725, 0.7248903305], [-0.0008859088, 0.0070824021, 0.5460441786]]) Phi = np.array([[1.0000000000, 0.2885578, -0.0005263667], [0.2885577889, 1.0000000, 0.3186286381], [-0.0005263667, 0.3186286, 1.0000000000]]) assert(np.allclose(model.L, Lambda, atol=1e-5)) assert(np.allclose(model.Phi, Phi, atol=1e-5))
def test_sem_bollen(): data_bollen = pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/sem/Bollen.csv", index_col=0) data_bollen = data_bollen[['x1', 'x2', 'x3', 'y1', 'y2', 'y3', 'y4', 'y5', 'y6', 'y7', 'y8', ]] L = np.array([[1., 0., 0.], [1., 0., 0.], [1., 0., 0.], [0., 1., 0.], [0., 1., 0.], [0., 1., 0.], [0., 1., 0.], [0., 0., 1.], [0., 0., 1.], [0., 0., 1.], [0., 0., 1.]]) B = np.array([[0., 0., 0.], [1., 0., 0.], [1., 1., 0.]]) Lambda1 = pd.DataFrame(L, index=data_bollen.columns, columns=['ind60', 'dem60', 'dem65']) Beta1 = pd.DataFrame(B, index=Lambda1.columns, columns=Lambda1.columns) Psi1 = pd.DataFrame(np.eye(Lambda1.shape[0]), index=Lambda1.index, columns=Lambda1.index) off_diag = [['y1', 'y5'], ['y2', 'y4'], ['y3', 'y7'], ['y4', 'y8'], ['y6', 'y8'], ['y2', 'y6']] for x, y in off_diag: Psi1.loc[x, y] = Psi1.loc[y, x] = 0.05 Phi1 = pd.DataFrame(np.eye(Lambda1.shape[1]), index=Lambda1.columns, columns=Lambda1.columns) model = SEM(Lambda1, Beta1, Phi1, Psi1, data=data_bollen) model.fit() theta = np.array([2.18036773, 1.81851130, 1.25674650, 1.05771677, 1.26478654, 1.18569630, 1.27951218, 1.26594698, 1.48300054, 0.57233619, 0.83734481, 0.44843715, 3.95603311, 0.17248133, 0.08154935, 0.11980648, 0.46670258, 1.89139552, 0.62367107, 7.37286854, 1.31311258, 2.15286127, 5.06746210, 0.79496028, 3.14790480, 0.34822604, 2.35097047, 4.95396775, 1.35616712, 3.43137392, 3.25408501]) assert(np.allclose(model.theta, theta)) assert(model.opt.success) assert(np.abs(model.opt.grad).max()<1e-5) assert(np.allclose(model.gradient(theta+0.1), fo_fc_cd(model.loglike, theta+0.1))) assert(np.allclose(model.hessian(theta), so_gc_cd(model.gradient, theta)))
def test_lmm(): rng = np.random.default_rng(123) n_grp, n_per = 100, 100 formula = "y~1+x1+x2+x3+(1+x3|id1)" model_dict = {} model_dict["ginfo"] = dict(id1=dict(n_grp=n_grp, n_per=n_per)) model_dict["beta"] = np.array([0.0, 0.5, -1.0, 1.0]) model_dict["vcov"] = np.eye(3) model_dict["mu"] = np.zeros(3) model_dict["gcov"] = {"id1": invech(np.array([2.0, -1.0, 2.0]))} model_dict["n_obs"] = n_grp * n_per group_dict = dict(id1=np.repeat(np.arange(n_grp), n_per)) rfe, rre = 0.4, 0.4 msim = MixedModelSim(formula=formula, model_dict=model_dict, rng=rng, group_dict=group_dict, var_ratios=np.array([rfe, rre])) var_ratios = np.array([msim.v_fe, msim.v_re, msim.v_rs]) var_ratios = var_ratios / np.sum(var_ratios) assert (np.allclose(var_ratios, np.array([0.4, 0.4, 0.2]))) df = msim.df.copy() df["y"], u = msim.simulate_response() model = LMM(formula, data=df) model.fit(opt_kws=dict(xtol=1e-16, gtol=1e-12)) assert (model.optimizer.success == True) assert ((np.abs(model.optimizer.grad) < 1e-2).all()) theta = np.array([0.95418349, -0.36657794, 0.84136618, 1.12001821]) assert (np.allclose(model.theta, theta)) eps = np.finfo(float).eps**(1 / 4) grad_close = np.allclose(model.gradient(theta), fo_fc_cd(model.loglike, theta), atol=eps, rtol=eps) hess_close = np.allclose(model.hessian(theta), so_gc_cd(model.gradient, theta), atol=eps, rtol=eps) assert (grad_close) assert (hess_close)
def test_sem_med(): x = rng.normal(0, 1, size=1000) x = (x - x.mean()) / x.std() u = rng.normal(0, 1, size=1000) u = (u - u.mean()) / u.std() v = rng.normal(0, 1, size=1000) v = (v - v.mean()) / v.std() m = 0.5*x + u y = 0.7*m + v data_path = pd.DataFrame(np.vstack((x, m, y)).T, columns=['x', 'm', 'y']) data_path = data_path - data_path.mean(axis=0) Lambda = pd.DataFrame(np.eye(3), index=data_path.columns, columns=data_path.columns) Beta = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [1.0, 1.0, 0.0]]) Beta = pd.DataFrame(Beta, index=data_path.columns, columns=data_path.columns) Phi = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]) Phi = pd.DataFrame(Phi, index=data_path.columns, columns=data_path.columns) Psi = np.array([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]) Psi = pd.DataFrame(Psi, index=data_path.columns, columns=data_path.columns) model = SEM(Lambda, Beta, Phi, Psi, data=data_path) model.fit(opt_kws=dict(options=dict(gtol=1e-20, xtol=1e-100))) theta = np.array([0.52667623, 0.13011021, 0.40325746, 1.00000000, 0.99928838, 1.43672041]) assert(np.allclose(model.theta, theta)) assert(model.opt.success) assert(np.abs(model.opt.grad).max()<1e-5) assert(np.allclose(model.gradient(theta+0.1), fo_fc_cd(model.loglike, theta+0.1))) assert(np.allclose(model.hessian(theta), so_gc_cd(model.gradient, theta)))
def test_betareg(): SEED = 1234 rng = np.random.default_rng(SEED) n_obs = 10_000 X = exact_rmvnorm(np.eye(4) / 100, n=n_obs, seed=SEED) Z = exact_rmvnorm(np.eye(2) / 100, n=n_obs, seed=SEED) betam = np.array([4.0, 1.0, -1.0, -2.0]) betas = np.array([2.0, -2.0]) etam, etas = X.dot(betam) + 1.0, 2 + Z.dot( betas) #np.tanh(Z.dot(betas))/2.0 + 2.4 mu, phi = LogitLink().inv_link(etam), LogLink().inv_link(etas) a = mu * phi b = (1.0 - mu) * phi y = rng.beta(a, b) xcols = [f"x{i}" for i in range(1, 4 + 1)] zcols = [f"z{i}" for i in range(1, 2 + 1)] data = pd.DataFrame(np.hstack((X, Z)), columns=xcols + zcols) data["y"] = y m_formula = "y~1+" + "+".join(xcols) s_formula = "y~1+" + "+".join(zcols) model = BetaReg(m_formula=m_formula, s_formula=s_formula, data=data) model.fit() theta = np.array([ 0.99819859, 3.92262116, 1.02091902, -0.98526682, -1.9795528, 1.98535573, 2.06533661, -2.06805411 ]) assert (np.allclose(model.theta, theta)) g1 = fo_fc_cd(model.loglike, model.theta * 0.95) g2 = model.gradient(model.theta * 0.95) assert (np.allclose(g1, g2)) H1 = so_gc_cd(model.gradient, model.theta) H2 = model.hessian(model.theta) assert (np.allclose(H1, H2)) assert (model.optimizer.success == True) assert ((np.abs(model.optimizer.grad) < 1e-5).all())
def test_gam(): n_obs = 10_000 X = pd.DataFrame(np.zeros((n_obs, 4)), columns=['x0', 'x1', 'x2', 'y']) X['x0'] = rng.choice(np.arange(5), size=n_obs, p=np.ones(5) / 5) X['x1'] = rng.uniform(-1, 1, size=n_obs) X['x2'] = rng.uniform(-2, 2, size=n_obs) u0 = dummy(X['x0']).dot(np.array([-0.2, 0.2, -0.2, 0.2, 0.0])) f1 = (3.0 * X['x1']**3 - 2.43 * X['x1']) f2 = (X['x2']**3 - X['x2']) / 10 eta = u0 + f1 + f2 mu = np.exp(eta) # mu = shape * scale shape = mu * 2.0 X['y'] = rng.gamma(shape=shape, scale=1.0) model = GAM("y~C(x0)+s(x1, kind='cr')+s(x2, kind='cr')", X, family=Gamma(link=LogLink)) model.fit() assert (model.opt.success == True) assert ((np.abs(model.opt.grad) < 1e-5).all()) theta = np.array([49.617907465401451, 407.234892157726449]) assert (np.allclose(np.exp(model.theta)[:-1], theta)) theta = model.theta.copy() eps = np.finfo(float).eps**(1 / 4) grad_close = np.allclose(model.gradient(theta), fo_fc_cd(model.reml, theta), atol=eps, rtol=eps) hess_close = np.allclose(model.hessian(theta), so_gc_cd(model.gradient, theta), atol=eps, rtol=eps) assert (grad_close) assert (hess_close)
def test_glm(): seed = 1234 rng = np.random.default_rng(seed) response_dists = ['Gaussian', 'Binomial', 'Poisson', 'Gamma', "InvGauss"] n_obs, nx, r = 2000, 10, 0.5 n_true = nx // 4 R = vine_corr(nx, 10, seed=seed) X = {} X1 = exact_rmvnorm(R, n_obs, seed=seed) X2 = exact_rmvnorm(R, n_obs, seed=seed) X2 = X2 - np.min(X2, axis=0) + 0.1 X['Gaussian'] = X1.copy() X['Binomial'] = X1.copy() X['Poisson'] = X1.copy() X['Gamma'] = X1.copy() X['InvGauss'] = X2.copy() beta = dict(Binomial=np.zeros(nx), Poisson=np.zeros(nx), Gamma=np.zeros(nx), Gaussian=np.zeros(nx), InvGauss=np.zeros(nx)) beta['Gaussian'][:n_true * 2] = np.concatenate( (0.5 * np.ones(n_true), -0.5 * np.ones(n_true))) beta['Binomial'][:n_true * 2] = np.concatenate( (0.5 * np.ones(n_true), -0.5 * np.ones(n_true))) beta['Poisson'][:n_true * 2] = np.concatenate( (0.5 * np.ones(n_true), -0.5 * np.ones(n_true))) beta['Gamma'][:n_true * 2] = np.concatenate( (0.1 * np.ones(n_true), -0.1 * np.ones(n_true))) beta['InvGauss'][:n_true * 2] = np.concatenate( (0.1 * np.ones(n_true), 0.1 * np.ones(n_true))) for dist in response_dists: beta[dist] = beta[dist][rng.choice(nx, nx, replace=False)] eta = {} eta_var = {} u_var = {} u = {} linpred = {} for dist in response_dists: eta[dist] = X[dist].dot(beta[dist]) eta_var[dist] = eta[dist].var() u_var[dist] = np.sqrt(eta_var[dist] * (1.0 - r) / r) u[dist] = rng.normal(0, u_var[dist], size=(n_obs)) linpred[dist] = u[dist] + eta[dist] if dist in ['InvGauss']: linpred[dist] -= linpred[dist].min() linpred[dist] += 0.01 Y = {} Y['Gaussian'] = IdentityLink().inv_link(linpred['Gaussian']) Y['Binomial'] = rng.binomial( n=10, p=LogitLink().inv_link(linpred['Binomial'])) / 10.0 Y['Poisson'] = rng.poisson(lam=LogLink().inv_link(linpred['Poisson'])) Y['Gamma'] = rng.gamma(shape=LogLink().inv_link(linpred['Gamma']), scale=3.0) Y['InvGauss'] = rng.wald(mean=PowerLink(-2).inv_link(eta['InvGauss']), scale=2.0) data = {} formula = "y~" + "+".join([f"x{i}" for i in range(1, nx + 1)]) for dist in response_dists: data[dist] = pd.DataFrame(np.hstack((X[dist], Y[dist].reshape(-1, 1))), columns=[f'x{i}' for i in range(1, nx + 1)] + ['y']) models = {} models['Gaussian'] = GLM(formula=formula, data=data['Gaussian'], fam=Gaussian(), scale_estimator='NR') models['Binomial'] = GLM(formula=formula, data=data['Binomial'], fam=Binomial(weights=np.ones(n_obs) * 10.0)) models['Poisson'] = GLM(formula=formula, data=data['Poisson'], fam=Poisson()) models['Gamma'] = GLM(formula=formula, data=data['Gamma'], fam=Gamma()) models['Gamma2'] = GLM(formula=formula, data=data['Gamma'], fam=Gamma(), scale_estimator='NR') models['InvGauss'] = GLM(formula=formula, data=data['InvGauss'], fam=InverseGaussian(), scale_estimator='NR') models['Gaussian'].fit() models['Binomial'].fit() models['Poisson'].fit() models['Gamma'].fit() models['Gamma2'].fit() models['InvGauss'].fit() grad_conv = {} grad_conv["Gaussian"] = np.mean(models['Gaussian'].optimizer.grad** 2) < 1e-6 grad_conv["Binomial"] = np.mean(models['Binomial'].optimizer.grad** 2) < 1e-6 grad_conv["Poisson"] = np.mean(models['Poisson'].optimizer.grad**2) < 1e-6 grad_conv["Gamma"] = models['Gamma'].optimizer['|g|'][-1] < 1e-6 grad_conv["Gamma2"] = models['Gamma2'].optimizer['|g|'][-1] < 1e-6 grad_conv["InvGauss"] = np.mean(models['InvGauss'].optimizer.grad** 2) < 1e-6 assert (np.all(grad_conv.values())) param_vals = {} param_vals["Gaussian"] = np.array([ 0.01677157, 0.01768816, 0.03232757, -0.50586418, 0.00538817, 0.01215466, 0.46273009, 0.03222982, 0.51013559, -0.00482659, -0.44925714, -0.08297647 ]) param_vals["Binomial"] = np.array([ -0.04811123, 0.34608258, 0.02748488, 0.02109192, -0.35403311, 0.37825192, -0.46275101, 0.00668586, 0.06837819, 0.00136615, 0.00321255 ]) param_vals["Poisson"] = np.array([ 0.78523498, -0.52630851, -0.0407732, 0.02971785, -0.03919242, -0.01845692, 0.34397533, -0.55594235, 0.0257876, 0.42205263, 0.13051603 ]) param_vals["Gamma"] = np.array([ 0.33020564, -0.00496934, -0.01392126, 0.03581743, -0.01186388, 0.03645015, -0.00609281, -0.01056508, 0.00163984, -0.03324063, -0.00937269 ]) param_vals["Gamma2"] = np.array([ 0.33020564, -0.00496934, -0.01392126, 0.03581743, -0.01186388, 0.03645015, -0.00609281, -0.01056508, 0.00163984, -0.03324063, -0.00937269, 0.09260053 ]) param_vals["InvGauss"] = np.array([ 0.51658718, -0.03040851, 0.14254292, 0.10087636, 0.05071923, -0.05297573, -0.04039982, -0.04293772, 0.1251764, -0.02370386, 0.01912702, -0.66386179 ]) param_close = {} grad_close = {} hess_close = {} for key in param_vals.keys(): m = models[key] param_close[key] = np.allclose(param_vals[key], m.params) x = m.params * 0.98 grad_close[key] = np.allclose(fo_fc_cd(m.loglike, x), m.gradient(x)) hess_close[key] = np.allclose(so_gc_cd(m.gradient, x), m.hessian(x)) assert (np.all(param_close.values())) assert (np.all(grad_conv.values())) assert (np.all(grad_close.values())) assert (np.all(hess_close.values()))