def test_16(self, obs_lognorm_obscorr): like = leopy.Likelihood(obs_lognorm_obscorr, p_true='lognorm', p_cond='norm') loc_true = [-0.02, 1.95] scale_true = [1.2, 2.9] shape_true = np.array([0.5, 1.44]).reshape(2, 1) rho = 0.54 R = np.array([[1., rho], [rho, 1.]]) p_all = like.p(loc_true, scale_true, shape_true=shape_true, R_true=R, pool=self.pool) N = obs_lognorm_obscorr.df.shape[0] p_all2 = np.zeros(N) for i in range(N): obs = leopy.Observation(obs_lognorm_obscorr.df.iloc[i:i + 1], 'test', verbosity=0) like = leopy.Likelihood(obs, p_true='lognorm', p_cond='norm') p_all2[i] = like.p(loc_true, scale_true, shape_true=shape_true, R_true=R, pool=self.pool) assert np.all(np.isclose(p_all.reshape(N), p_all2))
def test_12(self, obs_norm_MAR): like = leopy.Likelihood(obs_norm_MAR, p_true='norm', p_cond='norm') def f_mlnlike(x): loc_true = x[0:2] scale_true = x[2:4] rho = x[4] R = np.array([[1., rho], [rho, 1.]]) pp = like.p(loc_true, scale_true, R_true=R, pool=self.pool) if np.sum(pp == 0) > 0: return np.inf else: return -np.sum(np.log(pp)) bounds = scipy.optimize.Bounds([-np.inf, -np.inf, 1e-3, 1e-3, 1e-3], [np.inf, np.inf, 10., 10., 1 - 1e-3]) optres = scipy.optimize.minimize(f_mlnlike, [0., 0., 1., 1., 0.3], bounds=bounds, method='SLSQP', options={ 'disp': True, 'ftol': 1e-12 }) assert np.all( np.isclose( optres.x, [-0.17991379, 1.49608098, 0.98586541, 2.69842305, 0.44114192], rtol=1e-5, atol=1e-5))
def test_14(self, obs_norm_cen_uncorr): like = leopy.Likelihood(obs_norm_cen_uncorr, p_true='norm', p_cond='norm') def f_mlnlike(x): loc_true = x[0:2] scale_true = x[2:4] pp = like.p(loc_true, scale_true, pool=self.pool) if np.sum(pp == 0) > 0: return np.inf else: return -np.sum(np.log(pp)) bounds = scipy.optimize.Bounds([-np.inf, -np.inf, 1e-3, 1e-3], [np.inf, np.inf, 10., 10.]) optres = scipy.optimize.minimize(f_mlnlike, [0., 0., 1., 1.], bounds=bounds, method='SLSQP', options={ 'disp': True, 'ftol': 1e-12 }) assert np.all( np.isclose(optres.x, [0.54321826, 1.27320101, 0.97319273, 2.3491366], rtol=1e-5, atol=1e-5))
def test_15(self, obs_norm_obscorr): t0 = time.time() like = leopy.Likelihood(obs_norm_obscorr, p_true='norm', p_cond='norm') def f_mlnlike(x): loc_true = x[0:2] scale_true = x[2:4] rho = x[4] R = np.array([[1., rho], [rho, 1.]]) pp = like.p(loc_true, scale_true, R_true=R, pool=self.pool) if np.sum(pp == 0) > 0: return np.inf else: return -np.sum(np.log(pp)) bounds = scipy.optimize.Bounds( [-np.inf, -np.inf, 1e-3, 1e-3, -1 + 1e-3], [np.inf, np.inf, 10., 10., 1 - 1e-3]) optres = scipy.optimize.minimize(f_mlnlike, [0., 0., 1., 1., 0.3], bounds=bounds, method='SLSQP', options={ 'disp': True, 'ftol': 1e-12 }) t1 = time.time() print('Needed {:.4f} s'.format(t1 - t0)) print(optres.x) assert np.all( np.isclose( optres.x, [-1.08265859, 2.14778872, 1.18368684, 2.74908927, 0.49219241], rtol=1e-5, atol=1e-5))
def test_13(self, obs_norm_cen): like = leopy.Likelihood(obs_norm_cen, p_true='norm', p_cond='norm') def f_mlnlike(x): loc_true = x[0:2] scale_true = x[2:4] rho = x[4] R = np.array([[1., rho], [rho, 1.]]) pp = like.p(loc_true, scale_true, R_true=R, pool=self.pool) if np.sum(pp == 0) > 0: return np.inf else: return -np.sum(np.log(pp)) bounds = scipy.optimize.Bounds([-np.inf, -np.inf, 1e-3, 1e-3, 1e-3], [np.inf, np.inf, 10., 10., 1 - 1e-3]) optres = scipy.optimize.minimize(f_mlnlike, [0., 0., 1., 1., 0.3], bounds=bounds, method='SLSQP', options={ 'disp': True, 'ftol': 1e-12 }) assert np.all( np.isclose( optres.x, [0.47954307, 1.2705067, 0.88797593, 2.36476421, 0.52029972], rtol=1e-5, atol=1e-5))
def test_9(self, obs_norm_no_error): like = leopy.Likelihood(obs_norm_no_error, p_true='norm', verbosity=-1) def f_mlnlike(x): loc_true = x[0:2] scale_true = x[2:4] rho = x[4] R = np.array([[1., rho], [rho, 1.]]) pp = like.p(loc_true, scale_true, R_true=R, pool=self.pool) if np.sum(pp == 0) > 0: return np.inf else: return -np.sum(np.log(pp)) bounds = scipy.optimize.Bounds( [-np.inf, -np.inf, 1e-3, 1e-3, 1e-3], [np.inf, np.inf, np.inf, np.inf, 1 - 1e-3]) optres = scipy.optimize.minimize(f_mlnlike, [0., 0., 1., 1., 0.3], bounds=bounds, method='SLSQP', options={ 'disp': True, 'ftol': 1e-12 }) assert np.all( np.isclose(optres.x, [-1, 2, 1, 3, 0.4940357], rtol=1e-5, atol=1e-5))
def test_5(self): d = { 'v0': [1, 2], 'e_v0': [0.1, 0.2], 'v1': [3, 4], 'e_v1': [0.1, 0.1] } obs = leopy.Observation(pd.DataFrame(d), 'testdata', verbosity=0) like = leopy.Likelihood(obs, p_true='lognorm', verbosity=-1) p = like.p([0.5, 0.7], [1, 2], shape_true=[[1.4], [2.]], pool=self.pool) assert np.all(np.isclose(p, np.array([[0.0436189], [0.01067159]])))
def test_1(self): d = { 'v0': [1, 2], 'e_v0': [0.1, 0.2], 'v1': [3, 4], 'e_v1': [0.1, 0.1] } df = pd.DataFrame(d) obs = leopy.Observation(df, 'testdata', verbosity=0) like = leopy.Likelihood(obs, p_true='norm', verbosity=-1) stddev = [1, 2] mean = [0.5, 0.7] p = like.p(mean, stddev, pool=self.pool) p_v1 = scipy.stats.norm.pdf(df['v0'] - mean[0], scale=stddev[0]) p_v2 = scipy.stats.norm.pdf(df['v1'] - mean[1], scale=stddev[1]) assert np.all(np.isclose(p.T[0], p_v1 * p_v2))
def pdf(x, lgy): shape = x.shape x = x.flatten() lgy = lgy.flatten() obs = leopy.Observation({ 'v0': x, 'v1': 10**lgy }, 'true_pdf', verbosity=-1) like = leopy.Likelihood(obs, p_true='lognorm', p_cond=None, verbosity=-1) return (like.p( loc_true, scale_true, shape_true=shape_true, R_true=R) * 10**lgy[:, None] * np.log(10.)).reshape(shape)
def test_18(self): v0 = [0.5, 2.0, 1.7, 1.1] ev0 = [0.1, 0.2, 0.3, 0.15] v1 = [3, 4, 5.2, 2.2] ev1 = [0.1, 0.1, 0.15, 0.12] v2 = [-2, 3, 1.7, 1.] ev2 = [0.2, 0.1, 0.05, 0.15] d = { 'v0': v0, 'e_v0': ev0, 'v1': v1, 'e_v1': ev1, 'v2': v2, 'e_v2': ev2 } obs = leopy.Observation(d, 'test', verbosity=0) like = leopy.Likelihood(obs, p_true=['lognorm', 'gamma', 'norm'], p_cond='norm') loc_true = [-0.02, 1.95, 1] scale_true = [0.7, 1.9, 2.5] shape_true = [[0.5], [2.03], []] p_0 = like.p(loc_true, scale_true, shape_true=shape_true, vars=[0], pool=self.pool) p_01 = like.p(loc_true, scale_true, shape_true=shape_true, vars=[0, 1], pool=self.pool) p_02 = like.p(loc_true, scale_true, shape_true=shape_true, vars=[0, 2], pool=self.pool) p_012 = like.p(loc_true, scale_true, shape_true=shape_true, pool=self.pool) assert np.all(np.isclose(p_01 / p_0 * p_02 / p_0, p_012 / p_0))
def test_3(self): d = { 'v0': [1., 2., -4.], 'e_v0': [0.1, 0.2, 0.3], 'v1': [3., 4., 1.], 'e_v1': [0.1, 0.1, 0.1] } df = pd.DataFrame(d) obs = leopy.Observation(df, 'testdata', verbosity=0) like = leopy.Likelihood(obs, p_true='norm', verbosity=-1) R = np.array([[1, -0.3], [-0.3, 1]]) stddev = [1, 2] mean = [0.5, 0.7] cov = np.diag(stddev).dot(R.dot(np.diag(stddev))) p = like.p(mean, stddev, R_true=R, pool=self.pool) p_v1v2 = scipy.stats.multivariate_normal.pdf(df[['v0', 'v1']], mean=mean, cov=cov) assert np.all(np.isclose(p.T[0], p_v1v2))
def pdf(x, lgy): shape = x.shape x = x.flatten() lgy = lgy.flatten() obs = leopy.Observation({ 'v0': x, 'v1': 10**lgy }, 'true_pdf', verbosity=-1) like = leopy.Likelihood(obs, p_true='lognorm', p_cond=None, verbosity=-1) return (like.p([0, 0], 10**ML_result[0:2], shape_true=10**ML_result[2:4], R_true=ML_R) * 10**lgy[:, None] * np.log(10.)).reshape(shape)
def test_7(self): d = { 'v0': [1, 2], 'e_v0': [0.1, 0.2], 'v1': [3, 4], 'e_v1': [0.1, 0.1] } obs = leopy.Observation(pd.DataFrame(d), 'testdata', verbosity=0) like = leopy.Likelihood(obs, p_true='lognorm', p_cond='norm', verbosity=-1) p = like.p([0.5, 0.7], [1, 2], shape_true=[[1.4], [2.]], pool=self.pool) assert np.all( np.isclose(p, np.array([[0.04415356], [0.01089342]]), rtol=1e-5, atol=1e-5))
def test_10(self, obs_lognorm_no_error): like = leopy.Likelihood(obs_lognorm_no_error, p_true='lognorm', verbosity=-1) def f_mlnlike(x): print(x) loc_true = x[0:2] scale_true = x[2:4] shape_true = x[4:6].reshape(2, 1) rho = x[6] R = np.array([[1., rho], [rho, 1.]]) pp = like.p(loc_true, scale_true, shape_true=shape_true, R_true=R, pool=self.pool) if np.sum(pp == 0) > 0: return np.inf else: return -np.sum(np.log(pp)) bounds = scipy.optimize.Bounds( [-np.inf, -np.inf, 1e-3, 1e-3, 1e-3, 1e-3, -1 + 1e-3], [np.inf, np.inf, 10., 10., 10., 10., 1 - 1e-3]) optres = scipy.optimize.minimize(f_mlnlike, [0., 0., 1., 1., 1., 1., 0.3], bounds=bounds, method='SLSQP', options={ 'disp': True, 'ftol': 1e-12 }) assert np.all( np.isclose(optres.x, [ -0.01389813, 1.98866462, 1.17630436, 3.85686233, 0.53775924, 1.47418086, 0.54154499 ], rtol=1e-5, atol=1e-5))
def test_8(self): d = { 'v0': [1., 2., 0.8], 'e_v0': [1e-6, 1e-6, 1e-6], 'v1': [3., 4., 1.], 'e_v1': [1e-6, 1e-6, 1e-6] } df = pd.DataFrame(d) obs = leopy.Observation(df, 'testdata', verbosity=0) like = leopy.Likelihood(obs, p_true='lognorm', p_cond='norm', verbosity=-1) R = np.array([[1, -0.3], [-0.3, 1]]) scale = [1, 2] loc = [0.5, 0.] shape = [[1], [1.5]] p = like.p(loc, scale, shape_true=shape, R_true=R, pool=self.pool) assert np.all( np.isclose(p, np.array([[0.05819145], [0.01415945], [0.12375991]]), rtol=1e-5, atol=1e-5))
print('rho(y) = {}'.format(np.corrcoef(y.T))) df = pd.DataFrame(np.array([y[:, 0], y[:, 1], ey[:, 0], ey[:, 1]]).T, columns=['v0', 'v1', 'e_v0', 'e_v1']) obs = leopy.Observation(df, 'test', verbosity=0) ## -- print('Population parameter values: {:.3g} {:.3g} {:.3g} {:.3g} {:.3g}'.format( loc_true[0], loc_true[1], scale_true[0], scale_true[1], rho)) print('Sample parameters values: {:.3g} {:.3g} {:.3g} {:.3g} {:.3g}'.format( np.mean(y_true[:, 0]), np.mean(y_true[:, 1]), np.std(y_true[:, 0]), np.std(y_true[:, 1]), np.corrcoef(x.T)[0, 1])) ## -- set up Likelihood and find maximum likelihood parameters like = leopy.Likelihood(obs, p_true='norm', p_cond='norm', rtol=1e-6) like2 = leopy.Likelihood(obs, p_true='norm', rtol=1e-6) # comment out the following two lines to force numerical convolution # like.p_obs[0].name = 'composite' # like.p_obs[1].name = 'composite' def f_mlnlike(x): # print(x) loc_true = x[0:2] scale_true = x[2:4] rho = x[4] R = np.array([[1., rho], [rho, 1.]]) if np.any(np.linalg.eigvalsh(R) < 0): # ensure pos.-semidefinite return 1000.
plt.legend(hs, ls, loc='upper right', markerfirst=False, handletextpad=-0.1) if savefig: plt.savefig('joint_probability_{}.pdf'.format(irun)) if optimize: obs = leopy.Observation(df, 'joint probability') ## -- set up Likelihood and find maximum likelihood parameters like = leopy.Likelihood(obs, p_true='lognorm', p_cond='norm', verbosity=0) if correlated: def f_mlnlike(x, *args): if np.any(np.isnan(x)): return 1000. df = args[0] Nobs = df.shape[0] if 0: loc_true = x[0:2] scale_true = 10**x[2:4]
def test_17(self): v0 = [0.5, 2.0, 1.7] ev0 = [0.1, 0.2, 0.3] v1 = [3, 4, 5.2] ev1 = [0.1, 0.1, 0.15] rv0v1 = [0.2, 0.8, -0.8] d = {'v0': v0, 'e_v0': ev0, 'v1': v1, 'e_v1': ev1, 'r_v0_v1': rv0v1} obs = leopy.Observation(d, 'test', verbosity=0) like = leopy.Likelihood(obs, p_true='lognorm', p_cond='norm') loc_true = [-0.02, 1.95] scale_true = [0.7, 1.9] shape_true = np.array([0.5, 2.03]).reshape(2, 1) rho = 0.0 R = np.array([[1., rho], [rho, 1.]]) p_x = like.p(loc_true, scale_true, shape_true=shape_true, R_true=R, vars=[0], pool=self.pool) p_y = like.p(loc_true, scale_true, shape_true=shape_true, R_true=R, vars=[1], pool=self.pool) import scipy.integrate N = 2000 xx = np.concatenate([ -np.logspace(1, -5, N // 5) + loc_true[0], [loc_true[0]], np.logspace(-5, 4, N - N // 5 - 1) + loc_true[0] ]) yy = np.concatenate([ -np.logspace(1, -5, N // 5) + loc_true[1], [loc_true[1]], np.logspace(-5, 4, N - N // 5 - 1) + loc_true[1] ]) d_x = { 'v0': np.outer(v0, np.ones(N)).flatten(), 'e_v0': np.outer(ev0, np.ones(N)).flatten(), 'v1': np.outer(np.ones(3), yy).flatten(), 'e_v1': np.outer(ev1, np.ones(N)).flatten(), 'r_v0_v1': np.outer(rv0v1, np.ones(N)).flatten() } obs_x = leopy.Observation(d_x, 'test', verbosity=0) like_x = leopy.Likelihood(obs_x, p_true='lognorm', p_cond='norm') res = like_x.p(loc_true, scale_true, shape_true=shape_true, R_true=R, pool=self.pool) res = res.reshape(3, N) p_x_2 = scipy.integrate.trapz(res, yy) assert np.all(np.isclose(p_x.reshape(3), p_x_2, atol=1e-4)) d_y = { 'v0': np.outer(np.ones(3), xx).flatten(), 'e_v0': np.outer(ev0, np.ones(N)).flatten(), 'v1': np.outer(v1, np.ones(N)).flatten(), 'e_v1': np.outer(ev1, np.ones(N)).flatten(), 'r_v0_v1': np.outer(rv0v1, np.ones(N)).flatten() } obs_y = leopy.Observation(d_y, 'test', verbosity=0) like_y = leopy.Likelihood(obs_y, p_true='lognorm', p_cond='norm') res = like_y.p(loc_true, scale_true, shape_true=shape_true, R_true=R, pool=self.pool).reshape(3, N) p_y_2 = scipy.integrate.trapz(res, xx) assert np.all(np.isclose(p_y.reshape(3), p_y_2, atol=1e-4))
alpha=1., zorder=2) hs.append(h) ls.append('true data w/o intrinsic scatter') ## -- linear regression (Maximum likelihood with leopy) import leopy df = pd.DataFrame(np.array([x, y, uncert_x, uncert_y]).T, columns=['v0', 'v1', 'e_v0', 'e_v1']) obs = leopy.Observation(df, 'test', verbosity=0) ## -- set up Likelihood and find maximum likelihood parameters like = leopy.Likelihood(obs, p_true='norm', p_cond=[None, 'norm'], verbosity=-1) def f_lnlike(p, pool): print(p) # p are the three parameters of the fit # the slope (p[0]) # the intercept (p[1]) # and the intrinsic scatter (p[2]) Nmod = 200 dt = np.linspace(-4, 4, Nmod) # in units of meas. uncert. et = df['e_v0'].to_numpy().reshape(Nobs, 1) # meas. uncert. # t is unknown x_true
ey[:, 1] = 0.1 y[:, 0] += ey[:, 0] * np.random.randn(Ndata) y[:, 1] += ey[:, 1] * np.random.randn(Ndata) print('rho(y) = {}'.format(np.corrcoef(y.T))) df = pd.DataFrame(np.array([y[:, 0], y[:, 1], ey[:, 0], ey[:, 1]]).T, columns=['v0', 'v1', 'e_v0', 'e_v1']) obs = leopy.Observation(df, 'test', verbosity=0) ## -- print('Population parameter values: {:.3g} {:.3g} {:.3g} {:.3g} {:.3g} {:.3g} ' '{:.3g}'.format(loc_true[0], loc_true[1], scale_true[0], scale_true[1], shape_true[0], shape_true[1], rho)) ## -- set up Likelihood and find maximum likelihood parameters like = leopy.Likelihood(obs, p_true='lognorm', p_cond='norm') like2 = leopy.Likelihood(obs, p_true='lognorm') # return minus-log-likelihood/Ndata of params given data with meas. uncertainty def f_mlnlike(x): #print(x) loc_true = x[0:2] scale_true = x[2:4] shape_true = x[4:6] rho = x[6] R = np.array([[1., rho], [rho, 1.]]) if np.any(np.linalg.eigvalsh(R) < 0): # ensure pos.-semidefinite return 1000.
print('Data set: Ntotal = {}, Ndet = {}, Ncen = {}, Nmiss = {}'.format( df.shape[0], Ndet, Ncen, Nmis)) # downsampling for test purposes if 0: np.random.seed(2) df = df.sample(frac=0.1) print('Downsampling: Ntotal = {}, Ndet = {}, Ncen = {}, Nmiss = {}'.format( df.shape[0], Ndet, Ncen, Nmis)) # -- Step 2. Prepare LEO-Py obs = leopy.Observation(df, 'xGASS', variables=[lx, ly]) df = obs.df like = leopy.Likelihood(obs, p_true=['norm', leopy.stats.zi_gamma_lognorm], p_cond=[None, 'norm']) # -- Step 3. Prepare Maximum Likelihood analysis def f_mlnlike(x, pool): """Return minus log likelihood (rescaled).""" if np.any(np.isnan(x)): return 1000. Nobs = df.shape[0] t = df['v0'].to_numpy().reshape(Nobs, 1) m_scale, n_scale = xGASS_auxiliary.get_slope_intercept(x[0], x[1]) m_shape, n_shape = xGASS_auxiliary.get_slope_intercept(x[2], x[3]) m_zero, n_zero = xGASS_auxiliary.get_slope_intercept(x[4], x[5])
Likelihood Estimation of Observational data with Python Copyright 2019 University of Zurich, Robert Feldmann LEO-Py is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. LEO-Py is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with LEO-Py. If not, see <https://www.gnu.org/licenses/>. """ import leopy from schwimmbad import MultiPool pool = MultiPool() d = {'v0': [1, 2], 'e_v0': [0.1, 0.2], 'v1': [3, 4], 'e_v1': [0.1, 0.1]} obs = leopy.Observation(d, 'testdata') like = leopy.Likelihood(obs, p_true='gamma', p_cond='norm') print(like.p([0.5, 0.7], [1, 2], shape_true=[1.4, 2], pool=pool)) pool.close()