def merge_data_csvs(id): df = pandas.DataFrame() dir = dismod3.settings.JOB_WORKING_DIR % id #print dir for f in sorted(glob.glob('%s/posterior/data-*.csv' % dir)): #print 'merging %s' % f df2 = pandas.read_csv(f, index_col=None) df2.index = df2['index'] df = df.drop(set(df.index) & set(df2.index)).append(df2) df['residual'] = df['value'] - df['mu_pred'] df['scaled_residual'] = df['residual'] / pl.sqrt( df['value'] * (1 - df['value']) / df['effective_sample_size']) #df['scaled_residual'] = df['residual'] * pl.sqrt(df['effective_sample_size']) # including df['abs_scaled_residual'] = pl.absolute(df['scaled_residual']) d = .005 # TODO: save delta in these files, use negative binomial to calc logp df['logp'] = [ mc.negative_binomial_like(x * n, (p + 1e-3) * n, d * (p + 1e-3) * n) for x, p, n in zip(df['value'], df['mu_pred'], df['effective_sample_size']) ] df['logp'][df['data_type'] == 'rr'] = df['scaled_residual'][df['data_type'] == 'rr'] df = df.sort('logp') #print df.filter('data_type area age_start age_end year_start sex effective_sample_size value residual logp'.split())[:25] return df
def data_likelihood(value=data.deaths, mu=predicted, alpha=omega): if alpha >= 10**10: return mc.poisson_like(value, mu) else: if mu.min() <= 0.: mu = mu + 10.**-10 return mc.negative_binomial_like(value, mu, alpha)
def data_likelihood(value=np.round(self.training_data.cf * self.training_data.sample_size), mu=param_pred, alpha=alpha): if alpha >= 10**10: return mc.poisson_like(value, mu) else: if mu.min() <= 0.: mu = mu + 10**-10 return mc.negative_binomial_like(value, mu, alpha)
def merge_data_csvs(id): df = pandas.DataFrame() dir = dismod3.settings.JOB_WORKING_DIR % id #print dir for f in sorted(glob.glob('%s/posterior/data-*.csv'%dir)): #print 'merging %s' % f df2 = pandas.read_csv(f, index_col=None) df2.index = df2['index'] df = df.drop(set(df.index)&set(df2.index)).append(df2) df['residual'] = df['value'] - df['mu_pred'] df['scaled_residual'] = df['residual'] / pl.sqrt(df['value'] * (1 - df['value']) / df['effective_sample_size']) #df['scaled_residual'] = df['residual'] * pl.sqrt(df['effective_sample_size']) # including df['abs_scaled_residual'] = pl.absolute(df['scaled_residual']) d = .005 # TODO: save delta in these files, use negative binomial to calc logp df['logp'] = [mc.negative_binomial_like(x*n, (p+1e-3)*n, d*(p+1e-3)*n) for x,p,n in zip(df['value'], df['mu_pred'], df['effective_sample_size'])] df['logp'][df['data_type'] == 'rr'] = df['scaled_residual'][df['data_type'] == 'rr'] df = df.sort('logp') #print df.filter('data_type area age_start age_end year_start sex effective_sample_size value residual logp'.split())[:25] return df
def obs(value=value, S=data_sample, N=N, mu_i=rates, Xz=Xz, zeta=zeta, delta=delta): #zeta_i = .001 #residual = pl.log(value[S] + zeta_i) - pl.log(mu_i*N[S] + zeta_i) #return mc.normal_like(residual, 0, 100. + delta) logp = mc.negative_binomial_like(value[S], N[S]*mu_i, delta*pl.exp(Xz*zeta)) return logp
def obs_lb(value=value, N=N, Xa=Xa, Xb=Xb, alpha=alpha, beta=beta, gamma=gamma, bounds_func=vars['bounds_func'], delta=delta, age_indices=ai, age_weights=aw): # calculate study-specific rate function shifts = np.exp(np.dot(Xa, alpha) + np.dot(Xb, np.atleast_1d(beta))) exp_gamma = np.exp(gamma) mu_i = [np.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)] # TODO: try vectorizing this loop to increase speed rate_param = mu_i*N violated_bounds = np.nonzero(rate_param < value) logp = mc.negative_binomial_like(value[violated_bounds], rate_param[violated_bounds], delta) return logp
def obs_lb(value=value, N=N, Xa=Xa, Xb=Xb, alpha=alpha, beta=beta, gamma=gamma, bounds_func=vars['bounds_func'], delta=delta, age_indices=ai, age_weights=aw): # calculate study-specific rate function shifts = pl.exp(pl.dot(Xa, alpha) + pl.dot(Xb, pl.atleast_1d(beta))) exp_gamma = pl.exp(gamma) mu_i = [pl.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)] # TODO: try vectorizing this loop to increase speed rate_param = mu_i*N violated_bounds = pl.nonzero(rate_param < value) logp = mc.negative_binomial_like(value[violated_bounds], rate_param[violated_bounds], delta) return logp
def obs(pi=pi, delta=delta): return mc.negative_binomial_like(r * n, pi * n, delta)
def obs(value=value, N=N, mu_i=rates, delta=delta, Z=Z, eta=0.): logp = mc.negative_binomial_like(value, N*mu_i, delta + eta*Z) return logp
def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like(pl.maximum(value * n, pi * n), pi * n + 1.0e-9, delta)
def AR_dev(AR=AR, mu=exp_rate, r=r): return np.array([pm.negative_binomial_like(AR[i], mu[i], r[i]) for i in xrange(len(AR))])
def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like(pl.maximum(value * n, pi * n), pi * n + 1.e-9, delta)
def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like( value[~i_zero] * n[~i_zero], pi[~i_zero] * n[~i_zero] + 1.0e-9, delta[~i_zero] )
def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like(value[~i_zero] * n[~i_zero], pi[~i_zero] * n[~i_zero] + 1.e-9, delta[~i_zero])
def AR_dev(AR=AR, mu=exp_rate, r=r): return np.array([ pm.negative_binomial_like(AR[i], mu[i], r[i]) for i in xrange(len(AR)) ])
if 'data' in dm.vars[t] and 'p_pred' in dm.vars[t]: stats = dm.vars[t]['p_pred'].stats(batches=5) dm.vars[t]['data']['mu_pred'] = stats['mean'] dm.vars[t]['data']['sigma_pred'] = stats['standard deviation'] stats = dm.vars[t]['pi'].stats(batches=5) dm.vars[t]['data']['mc_error'] = stats['mc error'] dm.vars[t]['data']['residual'] = dm.vars[t]['data'][ 'value'] - dm.vars[t]['data']['mu_pred'] dm.vars[t]['data']['abs_residual'] = pl.absolute( dm.vars[t]['data']['residual']) if 'delta' in dm.vars[t]: if len(pl.atleast_1d(dm.vars[t]['delta'].value)) == 1: d = pl.atleast_1d(dm.vars[t]['delta'].stats()['mean']) dm.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred \ in zip(dm.vars[t]['data']['effective_sample_size'], dm.vars[t]['data']['value'], dm.vars[t]['data']['mu_pred'])] else: dm.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred, d \ in zip(dm.vars[t]['data']['effective_sample_size'], dm.vars[t]['data']['value'], dm.vars[t]['data']['mu_pred'], pl.atleast_1d(dm.vars[t]['delta'].stats()['mean']))] try: dm.vars[t]['data'].to_csv( dir + '/posterior/data-%s-%s+%s+%s.csv' % (t, predict_area, predict_sex, predict_year)) except IOError, e: print 'WARNING: could not save file' print e if 'U' in dm.vars[t]: re = dm.vars[t]['U'].T columns = list(re.columns) mu = []
def obs(pi=pi, delta=delta): return mc.negative_binomial_like(r*n, pi*n, delta)
continue print 'saving tables for', t if 'data' in dm.vars[t] and 'p_pred' in dm.vars[t]: stats = dm.vars[t]['p_pred'].stats(batches=5) dm.vars[t]['data']['mu_pred'] = stats['mean'] dm.vars[t]['data']['sigma_pred'] = stats['standard deviation'] stats = dm.vars[t]['pi'].stats(batches=5) dm.vars[t]['data']['mc_error'] = stats['mc error'] dm.vars[t]['data']['residual'] = dm.vars[t]['data']['value'] - dm.vars[t]['data']['mu_pred'] dm.vars[t]['data']['abs_residual'] = pl.absolute(dm.vars[t]['data']['residual']) if 'delta' in dm.vars[t]: if len(pl.atleast_1d(dm.vars[t]['delta'].value)) == 1: d = pl.atleast_1d(dm.vars[t]['delta'].stats()['mean']) dm.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred \ in zip(dm.vars[t]['data']['effective_sample_size'], dm.vars[t]['data']['value'], dm.vars[t]['data']['mu_pred'])] else: dm.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred, d \ in zip(dm.vars[t]['data']['effective_sample_size'], dm.vars[t]['data']['value'], dm.vars[t]['data']['mu_pred'], pl.atleast_1d(dm.vars[t]['delta'].stats()['mean']))] try: dm.vars[t]['data'].to_csv(dir + '/posterior/data-%s-%s+%s+%s.csv'%(t, predict_area, predict_sex, predict_year)) except IOError, e: print 'WARNING: could not save file' print e if 'U' in dm.vars[t]: re = dm.vars[t]['U'].T columns = list(re.columns) mu = [] sigma = [] for n in dm.vars[t]['alpha']: