def log_normal(name, pi, sigma, p, s): """ Generate PyMC objects for a lognormal model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `sigma` : pymc.Node, dispersion parameters of rates - `p` : array, observed values of rates - `s` : array, standard error sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p > 0), 'observed values must be positive' assert pl.all(s >= 0), 'standard error must be non-negative' i_inf = pl.isinf(s) @mc.observed(name='p_obs_%s' % name) def p_obs(value=p, pi=pi, sigma=sigma, s=s): return mc.normal_like(pl.log(value), pl.log(pi + 1.e-9), 1. / (sigma**2. + (s / value)**2.)) s_noninf = s.copy() s_noninf[i_inf] = 0. @mc.deterministic(name='p_pred_%s' % name) def p_pred(pi=pi, sigma=sigma, s=s_noninf): return pl.exp( mc.rnormal(pl.log(pi + 1.e-9), 1. / (sigma**2. + (s / (pi + 1.e-9))**2))) return dict(p_obs=p_obs, p_pred=p_pred)
def log_normal(name, pi, sigma, p, s): """ Generate PyMC objects for a lognormal model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `sigma` : pymc.Node, dispersion parameters of rates - `p` : array, observed values of rates - `s` : array, standard error sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p > 0), "observed values must be positive" assert pl.all(s >= 0), "standard error must be non-negative" i_inf = pl.isinf(s) @mc.observed(name="p_obs_%s" % name) def p_obs(value=p, pi=pi, sigma=sigma, s=s): return mc.normal_like(pl.log(value), pl.log(pi + 1.0e-9), 1.0 / (sigma ** 2.0 + (s / value) ** 2.0)) s_noninf = s.copy() s_noninf[i_inf] = 0.0 @mc.deterministic(name="p_pred_%s" % name) def p_pred(pi=pi, sigma=sigma, s=s_noninf): return pl.exp(mc.rnormal(pl.log(pi + 1.0e-9), 1.0 / (sigma ** 2.0 + (s / (pi + 1.0e-9)) ** 2))) return dict(p_obs=p_obs, p_pred=p_pred)
def binom(name, pi, p, n): """ Generate PyMC objects for a binomial model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `p` : array, observed values of rates - `n` : array, effective sample sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p >= 0), 'observed values must be non-negative' assert pl.all(n >= 0), 'effective sample size must non-negative' @mc.observed(name='p_obs_%s' % name) def p_obs(value=p, pi=pi, n=n): return mc.binomial_like(value * n, n, pi + 1.e-9) # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity n_nonzero = pl.array(n, dtype=int) n_nonzero[n == 0] = 1.e6 @mc.deterministic(name='p_pred_%s' % name) def p_pred(pi=pi, n=n_nonzero): return mc.rbinomial(n, pi + 1.e-9) / (1. * n) return dict(p_obs=p_obs, p_pred=p_pred)
def offset_log_normal(name, pi, sigma, p, s): """ Generate PyMC objects for an offset log-normal model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `sigma` : pymc.Node, dispersion parameters of rates - `p` : array, observed values of rates - `s` : array, standard error sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p >= 0), 'observed values must be non-negative' assert pl.all(s >= 0), 'standard error must be non-negative' p_zeta = mc.Uniform('p_zeta_%s'%name, 1.e-9, 10., value=1.e-6) i_inf = pl.isinf(s) @mc.observed(name='p_obs_%s'%name) def p_obs(value=p, pi=pi, sigma=sigma, s=s, p_zeta=p_zeta): return mc.normal_like(pl.log(value[~i_inf]+p_zeta), pl.log(pi[~i_inf]+p_zeta), 1./(sigma**2. + (s/(value+p_zeta))[~i_inf]**2.)) s_noninf = s.copy() s_noninf[i_inf] = 0. @mc.deterministic(name='p_pred_%s'%name) def p_pred(pi=pi, sigma=sigma, s=s_noninf, p_zeta=p_zeta): return pl.exp(mc.rnormal(pl.log(pi+p_zeta), 1./(sigma**2. + (s/(pi+p_zeta))**2.))) - p_zeta return dict(p_zeta=p_zeta, p_obs=p_obs, p_pred=p_pred)
def poisson(name, pi, p, n): """ Generate PyMC objects for a poisson model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `p` : array, observed values of rates - `n` : array, effective sample sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p >= 0), "observed values must be non-negative" assert pl.all(n >= 0), "effective sample size must non-negative" i_nonzero = n != 0.0 @mc.observed(name="p_obs_%s" % name) def p_obs(value=p, pi=pi, n=n): return mc.poisson_like((value * n)[i_nonzero], (pi * n)[i_nonzero]) # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity n_nonzero = pl.array(n.copy(), dtype=float) n_nonzero[n == 0.0] = 1.0e6 @mc.deterministic(name="p_pred_%s" % name) def p_pred(pi=pi, n=n_nonzero): return mc.rpoisson((pi * n).clip(1.0e-9, pl.inf)) / (1.0 * n) return dict(p_obs=p_obs, p_pred=p_pred)
def beta_binom(name, pi, p, n): """ Generate PyMC objects for a beta-binomial model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `p` : array, observed values of rates - `n` : array, effective sample sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p >= 0), 'observed values must be non-negative' assert pl.all(n >= 0), 'effective sample size must non-negative' p_n = mc.Uniform('p_n_%s'%name, lower=1.e4, upper=1.e9, value=1.e4) # convergence requires getting these bounds right pi_latent = [mc.Beta('pi_latent_%s_%d'%(name,i), pi[i]*p_n, (1-pi[i])*p_n, value=pi_i) for i, pi_i in enumerate(pi.value)] i_nonzero = (n!=0.) @mc.observed(name='p_obs_%s'%name) def p_obs(value=p, pi=pi_latent, n=n): pi_flat = pl.array(pi) return mc.binomial_like((value*n)[i_nonzero], n[i_nonzero], pi_flat[i_nonzero]) # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity n_nonzero = pl.array(n.copy(), dtype=int) n_nonzero[n==0] = 1.e6 @mc.deterministic(name='p_pred_%s'%name) def p_pred(pi=pi_latent, n=n_nonzero): return mc.rbinomial(n, pi) / (1.*n) return dict(p_n=p_n, pi_latent=pi_latent, p_obs=p_obs, p_pred=p_pred)
def binom(name, pi, p, n): """ Generate PyMC objects for a binomial model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `p` : array, observed values of rates - `n` : array, effective sample sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p >= 0), "observed values must be non-negative" assert pl.all(n >= 0), "effective sample size must non-negative" @mc.observed(name="p_obs_%s" % name) def p_obs(value=p, pi=pi, n=n): return mc.binomial_like(value * n, n, pi + 1.0e-9) # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity n_nonzero = pl.array(n, dtype=int) n_nonzero[n == 0] = 1.0e6 @mc.deterministic(name="p_pred_%s" % name) def p_pred(pi=pi, n=n_nonzero): return mc.rbinomial(n, pi + 1.0e-9) / (1.0 * n) return dict(p_obs=p_obs, p_pred=p_pred)
def poisson(name, pi, p, n): """ Generate PyMC objects for a poisson model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `p` : array, observed values of rates - `n` : array, effective sample sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p >= 0), 'observed values must be non-negative' assert pl.all(n >= 0), 'effective sample size must non-negative' i_nonzero = (n != 0.) @mc.observed(name='p_obs_%s' % name) def p_obs(value=p, pi=pi, n=n): return mc.poisson_like((value * n)[i_nonzero], (pi * n)[i_nonzero]) # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity n_nonzero = pl.array(n.copy(), dtype=float) n_nonzero[n == 0.] = 1.e6 @mc.deterministic(name='p_pred_%s' % name) def p_pred(pi=pi, n=n_nonzero): return mc.rpoisson((pi * n).clip(1.e-9, pl.inf)) / (1. * n) return dict(p_obs=p_obs, p_pred=p_pred)
def setup_asr_step_methods(m, vars, additional_stochs=[]): # groups RE stochastics that are suspected of being dependent groups = [] fe_group = [n for n in vars.get('beta', []) if isinstance(n, mc.Stochastic)] ap_group = [n for n in vars.get('gamma', []) if isinstance(n, mc.Stochastic)] groups += [[g_i, g_j] for g_i, g_j in zip(ap_group[1:], ap_group[:-1])] + [fe_group, ap_group, fe_group+ap_group] for a in vars.get('hierarchy', []): group = [] col_map = dict([[key, i] for i,key in enumerate(vars['U'].columns)]) if a in vars['U']: for b in nx.shortest_path(vars['hierarchy'], 'all', a): if b in vars['U']: n = vars['alpha'][col_map[b]] if isinstance(n, mc.Stochastic): group.append(n) groups.append(group) #if len(group) > 0: #group += ap_group #groups.append(group) #group += fe_group #groups.append(group) for stoch in groups: if len(stoch) > 0 and pl.all([isinstance(n, mc.Stochastic) for n in stoch]): # only step certain stochastics, for understanding convergence #if 'gamma_i' not in stoch[0].__name__: # print 'no stepper for', stoch # m.use_step_method(mc.NoStepper, stoch) # continue #print 'finding Normal Approx for', [n.__name__ for n in stoch] if additional_stochs == []: vars_to_fit = [vars.get('p_obs'), vars.get('pi_sim'), vars.get('smooth_gamma'), vars.get('parent_similarity'), vars.get('mu_sim'), vars.get('mu_age_derivative_potential'), vars.get('covariate_constraint')] else: vars_to_fit = additional_stochs try: raise ValueError na = mc.NormApprox(vars_to_fit + stoch) na.fit(method='fmin_powell', verbose=0) cov = pl.array(pl.inv(-na.hess), order='F') #print 'opt:', pl.round_([n.value for n in stoch], 2) #print 'cov:\n', cov.round(4) if pl.all(pl.eigvals(cov) >= 0): m.use_step_method(mc.AdaptiveMetropolis, stoch, cov=cov) else: raise ValueError except ValueError: #print 'cov matrix is not positive semi-definite' m.use_step_method(mc.AdaptiveMetropolis, stoch)
def plotD(self, t): Z = self.matrix[t] if self.axt != None: self.axt.cla() self.fig.delaxes(self.axt) self.axt = None if self.ax == None: self.ax = self.fig.add_subplot(111) ax = self.ax ax.cla() ax.set_title(u'Evolución de curvas de nivel en el dominio (Problema directo, 2D)') divi=np.zeros((len(self.Y),len(self.X)), float) divi[:,:]=Z[0,0] #BUG DE MATPLOTLIB: se grafica solo si la matriz no es multiplo de ones, no hay una curva de nivel if not p.all(np.equal(Z,divi)): ax.contour(self.X, self.Y, Z)
def __on_ir_sensor_data_(self, ir_data): if len(ir_data) != 4: return points = [] [points.append([io['x'], io['y']]) for io in ir_data] # the ir sensor likes to initialize with 1023 for all coordinates # so we catch that case here if self.is_ir_initial: self.is_ir_initial = pl.all(pl.array(points) == 1023) return x_values, y_values = \ self.__get_moving_averages_points(um.sort_points(points)) x, y = um.get_projection_transformed_point(x_values, y_values, self.monitor_width, self.monitor_height, self.width / 2, self.height / 2) self.pointer_location = (x, y) self.ir_data_updated.emit()
def SVMAF(self,freq,n,l): #Apply the SVMAF filter to the material parameters runningMean=lambda x,N: py.hstack((x[:N-1],py.convolve(x,py.ones((N,))/N,mode='same')[N-1:-N+1],x[(-N+1):])) #calculate the moving average of 3 points n_smoothed=runningMean(n,3) #evaluate H_smoothed from n_smoothed H_smoothed=self.H_theory(freq,[n_smoothed.real,n_smoothed.imag],l) H_r=H_smoothed.real H_i=H_smoothed.imag f=1 #the uncertainty margins lb_r=self.H.getFReal()-self.H.getFRealUnc()*f lb_i=self.H.getFImag()-self.H.getFImagUnc()*f ub_r=self.H.getFReal()+self.H.getFRealUnc()*f ub_i=self.H.getFImag()+self.H.getFImagUnc()*f #ix=all indices for which after smoothening n H is still inbetwen the bounds ix=py.all([H_r>=lb_r,H_r<ub_r,H_i>=lb_i,H_i<ub_i],axis=0) # #dont have a goood idea at the moment, so manually: for i in range(len(n_smoothed)): if ix[i]==0: n_smoothed[i]=n[i] print("SVMAF changed the refractive index at " + str(sum(ix)) + " frequencies") return n_smoothed
def normal_model(name, pi, sigma, p, s): """ Generate PyMC objects for a normal model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `sigma` : pymc.Node, dispersion parameters of rates - `p` : array, observed values of rates - `s` : array, standard error of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(s >= 0), 'standard error must be non-negative' i_inf = pl.isinf(s) @mc.observed(name='p_obs_%s'%name) def p_obs(value=p, pi=pi, sigma=sigma, s=s): return mc.normal_like(value[~i_inf], pi[~i_inf], 1./(sigma**2. + s[~i_inf]**2.)) s_noninf = s.copy() s_noninf[i_inf] = 0. @mc.deterministic(name='p_pred_%s'%name) def p_pred(pi=pi, sigma=sigma, s=s_noninf): return mc.rnormal(pi, 1./(sigma**2. + s**2.)) return dict(p_obs=p_obs, p_pred=p_pred)
def get_emp(disease, country, sex, year): # load posterior estimates from GBD 2010 Study global_model = load_new_model(disease, 'all', sex, year) emp = pandas.read_csv('/home/j/Project/dismod/output/dm-%s/posterior/dm-%s-%s-%s-%s-%s.csv'%(disease, disease, full_name[data_type], global_model.hierarchy.in_edges(country)[0][0], sex, year), index_col=None) # remove population numbers del emp['Population'] # keep only estimates from country cty_ix = (emp['Iso3'] == country) emp = emp[cty_ix] del emp['Iso3'] # keep only estimates for data type try: assert pl.all(emp['Rate type'] == full_name[data_type]) except: dt_ix = (emp['Rate type']) == full_name[data_type] emp = emp[dt_ix] del emp['Rate type'] # return GBD 2010 Study posterior emp.index = emp['Age'] del emp['Age'] return emp.mean(1), emp.std(1)*10
def visualize_steps(mod, fname='mod.avi', description_str=''): times = list(pl.arange(0, 30, .2)) + range(30, 200) + range(200, 1500, 10) times += range(1500, 1700) + range(1700, 3000, 10) times += range(3000, 3200) + range(3200, len(mod.X.trace()), 10) assert pl.all( pl.diff(times) >= 0. ), 'movies where time is not increasing are confusing and probably unintentional' try: print 'generating %d images' % len(times) for i, t in enumerate(times): if i % 100 == 99: print '%d of %d (t=%.2f)' % (i, len(times), t) sys.stdout.flush() visualize_single_step(mod, int(t), t - int(t), description_str) pl.savefig('mod%06d.png' % i) except KeyboardInterrupt: pass import subprocess subprocess.call( 'mencoder mf://mod*.png -mf w=800:h=600 -ovc x264 -of avi -o %s' % fname, shell=True) subprocess.call('mplayer -loop 1 %s' % fname, shell=True) subprocess.call('rm mod*.png', shell=True)
def plotD(self, t): Z = self.matrix[t] if self.axt != None: self.axt.cla() self.fig.delaxes(self.axt) self.axt = None if self.ax == None: self.ax = self.fig.add_subplot(111) ax = self.ax ax.cla() ax.set_title( u'Evolución de curvas de nivel en el dominio (Problema directo, 2D)' ) divi = np.zeros((len(self.Y), len(self.X)), float) divi[:, :] = Z[0, 0] #BUG DE MATPLOTLIB: se grafica solo si la matriz no es multiplo de ones, no hay una curva de nivel if not p.all(np.equal(Z, divi)): ax.contour(self.X, self.Y, Z)
def integrate_field(self, fn_spec, specific, fn_main, r=20, val=0.0): """ Assimilate a field with filename <fn_spec> from DataInput object <specific> into this DataInput's field with filename <fn_main>. The parameter <val> should be set to the specific dataset's value for undefined regions, default is 0.0. <r> is a parameter used to eliminate border artifacts from interpolation; increase this value to eliminate edge noise. """ print "::: integrating %s field from %s :::" % (fn_spec, specific.name) # get the dofmap to map from mesh vertex indices to function indicies : df = self.func_space.dofmap() dfmap = df.vertex_to_dof_map(self.mesh) unew = self.get_projection(fn_main) # existing dataset projection uocom = unew.compute_vertex_values() # mesh indexed main vertex values uspec = specific.get_projection(fn_spec) # specific dataset projection uscom = uspec.compute_vertex_values() # mesh indexed spec vertex values d = float64(specific.data[fn_spec]) # original matlab spec dataset # get arrays of x-values for specific domain xs = specific.x ys = specific.y nx = specific.nx ny = specific.ny for v in vertices(self.mesh): # mesh vertex x,y coordinate : i = v.index() p = v.point() x = p.x() y = p.y() # indexes of closest datapoint to specific dataset's x and y domains : idx = abs(xs - x).argmin() idy = abs(ys - y).argmin() # data value for closest value and square around the value in question : dv = d[idy, idx] db = d[max(0,idy-r) : min(ny, idy+r), max(0, idx-r) : min(nx, idx+r)] # if the vertex is in the domain of the specific dataset, and the value # of the dataset at this point is not abov <val>, set the array value # of the main file to this new specific region's value. if dv > val: #print "found:", x, y, idx, idy, v.index() # if the values is not near an edge, make the value equal to the # nearest specific region's dataset value, otherwise, use the # specific region's projected value : if all(db > val): uocom[i] = uscom[i] else : uocom[i] = dv # set the values of the projected original dataset equal to the assimilated # dataset : unew.vector().set_local(uocom[dfmap]) return unew
def test_save_and_load(): d = data.ModelData.from_gbd_json('tests/dismoditis.json') # TODO: delete this dir if it exists d.save('tests/tmp') # TODO: test that files really were created d2 = data.ModelData.load('tests/tmp') assert d.input_data.shape == d2.input_data.shape, 'input data should be equal before and after save' assert pl.all(d.input_data['value'] == d2.input_data['value']), 'input data should be equal before and after save' assert d.output_template.shape == d2.output_template.shape, 'output template should be equal before and after save' assert pl.all(d.output_template['area'] == d2.output_template['area']), 'output template should be equal before and after save' assert d.parameters == d2.parameters, 'parameters should be equal before and after save' assert sorted(d.hierarchy.edges()) == sorted(d2.hierarchy.edges()), 'hierarchy should be equal before and after save' assert d.nodes_to_fit == d2.nodes_to_fit, 'nodess_to_fit should be equal before and after save'
def test_good_model(self): vars = models.latent_simplex(self.X) assert pl.all( pl.sum(vars['pi'].value, 1) <= 1.0 ), 'pi values should sum to at most 1, (%s found)' % pl.sum( vars['pi'].value, 1) m = mc.MCMC(vars) m.sample(10)
def neg_binom(name, pi, delta, p, n): """ Generate PyMC objects for a negative binomial model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `delta` : pymc.Node, dispersion parameters of rates - `p` : array, observed values of rates - `n` : array, effective sample sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p >= 0), 'observed values must be non-negative' assert pl.all(n >= 0), 'effective sample size must non-negative' i_zero = pl.array(n == 0.) if (isinstance(delta, mc.Node) and pl.shape(delta.value) == ()) \ or (not isinstance(delta, mc.Node) and pl.shape(delta) == ()): # delta is a scalar @mc.observed(name='p_obs_%s' % name) def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like(value[~i_zero] * n[~i_zero], pi[~i_zero] * n[~i_zero] + 1.e-9, delta) else: @mc.observed(name='p_obs_%s' % name) def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like(value[~i_zero] * n[~i_zero], pi[~i_zero] * n[~i_zero] + 1.e-9, delta[~i_zero]) # for any observation with n=0, make predictions for n=1.e9, to use for predictive validity n_nonzero = n.copy() n_nonzero[i_zero] = 1.e9 @mc.deterministic(name='p_pred_%s' % name) def p_pred(pi=pi, delta=delta, n=n_nonzero): return mc.rnegative_binomial(pi * n + 1.e-9, delta) / pl.array( n + 1.e-9, dtype=float) return dict(p_obs=p_obs, p_pred=p_pred)
def __iadd__(self,other): if(self.id==None): self.id = other.id; if(pl.all(self.id==other.id)): self.trials+=other.trials; self.nr_trials = len(self.trials); else: print "\nERROR: cannot concatenate blocks with differing parameters!\n" return self;
def neg_binom(name, pi, delta, p, n): """ Generate PyMC objects for a negative binomial model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `delta` : pymc.Node, dispersion parameters of rates - `p` : array, observed values of rates - `n` : array, effective sample sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(p >= 0), "observed values must be non-negative" assert pl.all(n >= 0), "effective sample size must non-negative" i_zero = pl.array(n == 0.0) if (isinstance(delta, mc.Node) and pl.shape(delta.value) == ()) or ( not isinstance(delta, mc.Node) and pl.shape(delta) == () ): # delta is a scalar @mc.observed(name="p_obs_%s" % name) def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like(value[~i_zero] * n[~i_zero], pi[~i_zero] * n[~i_zero] + 1.0e-9, delta) else: @mc.observed(name="p_obs_%s" % name) def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like( value[~i_zero] * n[~i_zero], pi[~i_zero] * n[~i_zero] + 1.0e-9, delta[~i_zero] ) # for any observation with n=0, make predictions for n=1.e9, to use for predictive validity n_nonzero = n.copy() n_nonzero[i_zero] = 1.0e9 @mc.deterministic(name="p_pred_%s" % name) def p_pred(pi=pi, delta=delta, n=n_nonzero): return mc.rnegative_binomial(pi * n + 1.0e-9, delta) / pl.array(n + 1.0e-9, dtype=float) return dict(p_obs=p_obs, p_pred=p_pred)
def plotD2(self, t, ax): Z = self.matrix[t] ax.cla() divi = np.zeros((len(self.Y), len(self.X)), float) divi[:, :] = Z[0, 0] # BUG DE MATPLOTLIB: se grafica solo si la matriz no es multiplo de ones, no hay una curva de nivel if not p.all(np.equal(Z, divi)): ax.contour(self.X, self.Y, Z)
def test_save_and_load(): d = data.ModelData.from_gbd_json('tests/dismoditis.json') # TODO: delete this dir if it exists d.save('tests/tmp') # TODO: test that files really were created d2 = data.ModelData.load('tests/tmp') assert d.input_data.shape == d2.input_data.shape, 'input data should be equal before and after save' assert pl.all(d.input_data['value'] == d2.input_data['value'] ), 'input data should be equal before and after save' assert d.output_template.shape == d2.output_template.shape, 'output template should be equal before and after save' assert pl.all(d.output_template['area'] == d2.output_template['area'] ), 'output template should be equal before and after save' assert d.parameters == d2.parameters, 'parameters should be equal before and after save' assert sorted(d.hierarchy.edges()) == sorted(d2.hierarchy.edges( )), 'hierarchy should be equal before and after save' assert d.nodes_to_fit == d2.nodes_to_fit, 'nodess_to_fit should be equal before and after save'
def plotD2(self, t, ax): Z = self.matrix[t] ax.cla() divi = np.zeros((len(self.Y), len(self.X)), float) divi[:, :] = Z[0, 0] #BUG DE MATPLOTLIB: se grafica solo si la matriz no es multiplo de ones, no hay una curva de nivel if not p.all(np.equal(Z, divi)): ax.contour(self.X, self.Y, Z)
def load_block_data(pathname): valid_filenames = [ fname for fname in glob(pathname + '/*.yml') if filename_pattern.match(fname) ] blocks = sorted([SearchBlock(fname) for fname in valid_filenames]) ids = unique([block.id for block in blocks]) combined_blocks = [] for id in ids: blk = pl.sum([block for block in blocks if pl.all(block.id == id)]) combined_blocks.append(blk) return combined_blocks
def make_exact(time): dens1 = 1.0 Gamma = 1.4 Mach = 2.0 dens2 = dens1 * (Gamma + 1.0) * Mach**2 / ((Gamma - 1.0) * Mach**2 + 2.0) speed = Mach * pylab.sqrt(Gamma * 1.0 / dens1) xexact = pylab.arange(0.0, 1.0, 0.001) deltime = time - xexact / speed exact = pylab.ones(shape=deltime.shape) * dens1 index = pylab.all([deltime > 0], axis=0) exact[index] = dens2 return (xexact, exact)
def neg_binom_lower_bound(name, pi, delta, p, n): """ Generate PyMC objects for a negative binomial lower bound model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `delta` : pymc.Node, dispersion parameters of rates - `p` : array, observed values of rates - `n` : array, effective sample sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' the observed stochastic """ assert pl.all(p >= 0), 'observed values must be non-negative' assert pl.all(n > 0), 'effective sample size must be positive' @mc.observed(name='p_obs_%s'%name) def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like(pl.maximum(value*n, pi*n), pi*n+1.e-9, delta) return dict(p_obs=p_obs)
def neg_binom_lower_bound(name, pi, delta, p, n): """ Generate PyMC objects for a negative binomial lower bound model :Parameters: - `name` : str - `pi` : pymc.Node, expected values of rates - `delta` : pymc.Node, dispersion parameters of rates - `p` : array, observed values of rates - `n` : array, effective sample sizes of rates :Results: - Returns dict of PyMC objects, including 'p_obs' the observed stochastic """ assert pl.all(p >= 0), "observed values must be non-negative" assert pl.all(n > 0), "effective sample size must be positive" @mc.observed(name="p_obs_%s" % name) def p_obs(value=p, pi=pi, delta=delta, n=n): return mc.negative_binomial_like(pl.maximum(value * n, pi * n), pi * n + 1.0e-9, delta) return dict(p_obs=p_obs)
def identify_nans(self, data, fn): """ private method to identify rows and columns of all nans from grids. This happens when the data from multiple GIS databases don't quite align on whatever the desired grid is. """ #print "::: DataInput identifying NaNs for %s :::" % fn good_x = ~all(isnan(data), axis=0) & self.good_x # good cols good_y = ~all(isnan(data), axis=1) & self.good_y # good rows if any(good_x != self.good_x): total_nan_x = sum(good_x == False) self.rem_nans = True print "Warning: %d row(s) of \"%s\" are entirely NaN." % (total_nan_x, fn) if any(good_y != self.good_y): total_nan_y = sum(good_y == False) self.rem_nans = True print "Warning: %d col(s) of \"%s\" are entirely NaN." % (total_nan_y, fn) self.good_x = good_x self.good_y = good_y
def identify_nans(self, data, fn): """ private method to identify rows and columns of all nans from grids. This happens when the data from multiple GIS databases don't quite align on whatever the desired grid is. """ good_x = ~all(isnan(data), axis=0) & self.good_x # good cols good_y = ~all(isnan(data), axis=1) & self.good_y # good rows if any(good_x != self.good_x): total_nan_x = sum(good_x == False) self.rem_nans = True s = "Warning: %d row(s) of \"%s\" are entirely NaN." % (total_nan_x, fn) print_text(s, self.color) if any(good_y != self.good_y): total_nan_y = sum(good_y == False) self.rem_nans = True s = "Warning: %d col(s) of \"%s\" are entirely NaN." % (total_nan_y, fn) print_text(s, self.color) self.good_x = good_x self.good_y = good_y
def test_process_fit_results(self): r = p.arange(5) e = p.outer(p.arange(5), p.arange(5)) alpha_psp = AlphaPSP() pr, pe = alpha_psp.process_fit_results(r, e) self.assertTrue(p.all(pr == p.array([0, 2, 1, 3, 4]))) self.assertEqual(pe[1, 1], 4) self.assertEqual(pe[2, 2], 1) self.assertLess(pr[2], pr[1]) self.assertLess(pe[2, 2], pe[1, 1]) # test again with permuted values pr, pe = alpha_psp.process_fit_results(pr, pe) self.assertTrue(p.all(pr == p.array([0, 2, 1, 3, 4]))) self.assertEqual(pe[1, 1], 4) self.assertEqual(pe[2, 2], 1) self.assertLess(pr[2], pr[1]) self.assertLess(pe[2, 2], pe[1, 1])
def spline(name, ages, knots, smoothing, interpolation_method='linear'): """ Generate PyMC objects for a spline model of age-specific rate Parameters ---------- name : str knots : array ages : array, points to interpolate to smoothing : pymc.Node, smoothness parameter for smoothing spline interpolation_method : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, 'cubic' Results ------- Returns dict of PyMC objects, including 'gamma' (log of rate at knots) and 'mu_age' (age-specific rate interpolated at all age points) """ assert pl.all(pl.diff(knots) > 0), 'Spline knots must be strictly increasing' # TODO: consider changing this prior distribution to be something more familiar in linear space gamma = [mc.Normal('gamma_%s_%d'%(name,k), 0., 10.**-2, value=-10.) for k in knots] #gamma = [mc.Uniform('gamma_%s_%d'%(name,k), -20., 20., value=-10.) for k in knots] # TODO: fix AdaptiveMetropolis so that this is not necessary flat_gamma = mc.Lambda('flat_gamma_%s'%name, lambda gamma=gamma: pl.array([x for x in pl.flatten(gamma)])) import scipy.interpolate @mc.deterministic(name='mu_age_%s'%name) def mu_age(gamma=flat_gamma, knots=knots, ages=ages): mu = scipy.interpolate.interp1d(knots, pl.exp(gamma), kind=interpolation_method, bounds_error=False, fill_value=0.) return mu(ages) vars = dict(gamma=gamma, mu_age=mu_age, ages=ages, knots=knots) if (smoothing > 0) and (not pl.isinf(smoothing)): #print 'adding smoothing of', smoothing @mc.potential(name='smooth_mu_%s'%name) def smooth_gamma(gamma=flat_gamma, knots=knots, tau=smoothing**-2): # the following is to include a "noise floor" so that level value # zero prior does not exert undue influence on age pattern # smoothing # TODO: consider changing this to an offset log normal gamma = gamma.clip(pl.log(pl.exp(gamma).mean()/10.), pl.inf) # only include smoothing on values within 10x of mean return mc.normal_like(pl.sqrt(pl.sum(pl.diff(gamma)**2 / pl.diff(knots))), 0, tau) vars['smooth_gamma'] = smooth_gamma return vars
def spline(name, ages, knots, smoothing, interpolation_method='linear'): """ Generate PyMC objects for a piecewise constant Gaussian process (PCGP) model Parameters ---------- name : str knots : array, locations of the discontinuities in the piecewise constant function ages : array, points to interpolate to smoothing : pymc.Node, smoothness parameter for smoothing spline interpolation_method : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, 'cubic' Results ------- Returns dict of PyMC objects, including 'gamma' and 'mu_age' the observed stochastic likelihood and data predicted stochastic """ assert pl.all(pl.diff(knots) > 0), 'Spline knots must be strictly increasing' gamma = [mc.Normal('gamma_%s_%d'%(name,k), 0., 10.**-2, value=-10.) for k in knots] #gamma = [mc.Uniform('gamma_%s_%d'%(name,k), -20., 20., value=-10.) for k in knots] # TODO: fix AdaptiveMetropolis so that this is not necessary flat_gamma = mc.Lambda('flat_gamma_%s'%name, lambda gamma=gamma: pl.array([x for x in pl.flatten(gamma)])) import scipy.interpolate @mc.deterministic(name='mu_age_%s'%name) def mu_age(gamma=flat_gamma, knots=knots, ages=ages): mu = scipy.interpolate.interp1d(knots, pl.exp(gamma), kind=interpolation_method, bounds_error=False, fill_value=0.) return mu(ages) vars = dict(gamma=gamma, mu_age=mu_age, ages=ages, knots=knots) if (smoothing > 0) and (not pl.isinf(smoothing)): print 'adding smoothing of', smoothing @mc.potential(name='smooth_mu_%s'%name) def smooth_gamma(gamma=flat_gamma, knots=knots, tau=smoothing**-2): # the following is to include a "noise floor" so that level value # zero prior does not exert undue influence on age pattern # smoothing gamma = gamma.clip(pl.log(pl.exp(gamma).mean()/10.), pl.inf) # only include smoothing on values within 10x of mean return mc.normal_like(pl.sqrt(pl.sum(pl.diff(gamma)**2 / pl.diff(knots))), 0, tau) vars['smooth_gamma'] = smooth_gamma return vars
def plotC2(self, t, ax): ax.cla() X = self.X Y = self.Y u = self.matx[t] #ax.set_title(u'Gradiente del nivel (Problema directo, dirección de flujo)') ##Esta pregunta es para que no salten los warnings porque los vectores son 000 divi = np.zeros((len(self.Y), len(self.X)), float) if not p.all(np.equal(u, divi)): if self.tipodis == None or self.tipodis != "Logaritmica": ## print 'Aún no disponible para este tipo de discretización' #else: if self.tipodis == None: v = self.maty[t] * -1 elif self.tipodis == "Lineal": #quiver(x,y,gxh(:,:,i),gyh(:,:,i)); v = self.maty[t] q = ax.quiver(X, Y, u, v, color=['r'])
def plotC2(self, t, ax): ax.cla() X = self.X Y = self.Y u = self.matx[t] # ax.set_title(u'Gradiente del nivel (Problema directo, dirección de flujo)') ##Esta pregunta es para que no salten los warnings porque los vectores son 000 divi = np.zeros((len(self.Y), len(self.X)), float) if not p.all(np.equal(u, divi)): if self.tipodis == None or self.tipodis != "Logaritmica": ## print 'Aún no disponible para este tipo de discretización' # else: if self.tipodis == None: v = self.maty[t] * -1 elif self.tipodis == "Lineal": # quiver(x,y,gxh(:,:,i),gyh(:,:,i)); v = self.maty[t] q = ax.quiver(X, Y, u, v, color=["r"])
def plotC(self, t): if self.axt != None: self.axt.cla() self.fig.delaxes(self.axt) self.axt = None if self.ax == None: self.ax = self.fig.add_subplot(111) ax = self.ax ax.cla() X = self.X Y = self.Y u = self.matx[t] ax.set_title( u'Gradiente del nivel (Problema directo, dirección de flujo)') ##Esta pregunta es para que no salten los warnings porque los vectores son 000 divi = np.zeros((len(self.Y), len(self.X)), float) if not p.all(np.equal(u, divi)): if self.tipodis != None and self.tipodis == "Logaritmica": print 'Aún no disponible para este tipo de discretización' else: if self.tipodis == None: v = self.maty[t] * -1 elif self.tipodis == "Lineal": #quiver(x,y,gxh(:,:,i),gyh(:,:,i)); v = self.maty[t] q = ax.quiver(X, Y, u, v, color=['r'])
def visualize_steps(mod, fname="mod.avi", description_str=""): times = list(pl.arange(0, 30, 0.2)) + range(30, 200) + range(200, 1500, 10) times += range(1500, 1700) + range(1700, 3000, 10) times += range(3000, 3200) + range(3200, len(mod.X.trace()), 10) assert pl.all(pl.diff(times) >= 0.0), "movies where time is not increasing are confusing and probably unintentional" try: print "generating %d images" % len(times) for i, t in enumerate(times): if i % 100 == 99: print "%d of %d (t=%.2f)" % (i, len(times), t) sys.stdout.flush() visualize_single_step(mod, int(t), t - int(t), description_str) pl.savefig("mod%06d.png" % i) except KeyboardInterrupt: pass import subprocess subprocess.call("mencoder mf://mod*.png -mf w=800:h=600 -ovc x264 -of avi -o %s" % fname, shell=True) subprocess.call("mplayer -loop 1 %s" % fname, shell=True) subprocess.call("rm mod*.png", shell=True)
def pseudoSpect(A, npts=200, s=2., gridPointSelect=100, verbose=True, lstSqSolve=True): """ original code from http://www.cs.ox.ac.uk/projects/pseudospectra/psa.m % psa.m - Simple code for 2-norm pseudospectra of given matrix A. % Typically about N/4 times faster than the obvious SVD method. % Comes with no guarantees! - L. N. Trefethen, March 1999. parameter: A: the matrix to analyze npts: number of points at the grid s: axis limits (-s ... +s) gridPointSelect: ??? verbose: prints progress messages lstSqSolve: if true, use least squares in algorithm where solve could be used (probably) instead. (replacement for ldivide in MatLab) """ from scipy.linalg import schur, triu from pylab import (meshgrid, norm, dot, zeros, eye, diag, find, linspace, arange, isreal, inf, ones, lstsq, solve, sqrt, randn, eig, all) ldiv = lambda M1, M2: lstsq(M1, M2)[ 0] if lstSqSolve else lambda M1, M2: solve(M1, M2) def planerot(x): ''' return (G,y) with a matrix G such that y = G*x with y[1] = 0 ''' G = zeros((2, 2)) xn = x / norm(x) G[0, 0] = xn[0] G[1, 0] = -xn[1] G[0, 1] = xn[1] G[1, 1] = xn[0] return G, dot(G, x) xmin = -s xmax = s ymin = -s ymax = s x = linspace(xmin, xmax, npts, endpoint=False) y = linspace(ymin, ymax, npts, endpoint=False) xx, yy = meshgrid(x, y) zz = xx + 1j * yy #% Compute Schur form and plot eigenvalues: T, Z = schur(A, output='complex') T = triu(T) eigA = diag(T) # Reorder Schur decomposition and compress to interesting subspace: select = find(eigA.real > -250) # % <- ALTER SUBSPACE SELECTION n = len(select) for i in arange(n): for k in arange(select[i] - 1, i, -1): #:-1:i G = planerot([T[k, k + 1], T[k, k] - T[k + 1, k + 1]])[0].T[::-1, ::-1] J = slice(k, k + 2) T[:, J] = dot(T[:, J], G) T[J, :] = dot(G.T, T[J, :]) T = triu(T[:n, :n]) I = eye(n) # Compute resolvent norms by inverse Lanczos iteration and plot contours: sigmin = inf * ones((len(y), len(x))) #A = eye(5) niter = 0 for i in arange(len(y)): # 1:length(y) if all(isreal(A)) and (ymax == -ymin) and (i > len(y) / 2): sigmin[i, :] = sigmin[len(y) - i, :] else: for jj in arange(len(x)): z = zz[i, jj] T1 = z * I - T T2 = T1.conj().T if z.real < gridPointSelect: # <- ALTER GRID POINT SELECTION sigold = 0 qold = zeros((n, 1)) beta = 0 H = zeros((100, 100)) q = randn(n, 1) + 1j * randn(n, 1) while norm(q) < 1e-8: q = randn(n, 1) + 1j * randn(n, 1) q = q / norm(q) for k in arange(99): v = ldiv(T1, (ldiv(T2, q))) - dot(beta, qold) #stop alpha = dot(q.conj().T, v).real v = v - alpha * q beta = norm(v) qold = q q = v / beta H[k + 1, k] = beta H[k, k + 1] = beta H[k, k] = alpha if (alpha > 1e100): sig = alpha else: sig = max(abs(eig(H[:k + 1, :k + 1])[0])) if (abs(sigold / sig - 1) < .001) or (sig < 3 and k > 2): break sigold = sig niter += 1 #print 'niter = ', niter #%text(x(jj),y(i),num2str(k)) % <- SHOW ITERATION COUNTS sigmin[i, jj] = 1. / sqrt(sig) #end # end if verbose: print 'finished line ', str(i), ' out of ', str(len(y)) return x, y, sigmin
def fill_nan(data, max_len=None, fill_ends=True): """ Fills the "nan" fields of a 1D array with linear interpolated values. At the edges, constant values are assumed. :args: data (1d array): the input data max_len (int or None): maximal length of gaps to fill fill_ends (bool): whether or not to fill the ends :returns: data' (1d array): a copy of the input data, where `nan`-values are replaced by a linear interpolation between adjacent values """ res = data.copy() if all(isnan(data)): return res missing_idx = find(isnan(data)) # group to missing segments missing_segs = [] gap_lengths = [] lastidx = -2 # some invalid index: idx == lastidx + 1 cannot be true for this! startidx = -2 # some invalid index gaplen = 0 for idx in missing_idx: if idx == lastidx + 1: # all right, the segment continues lastidx = idx gaplen += 1 else: # a new segment has started # first: "close" old segment if exists if startidx >= 0: missing_segs.append([startidx, lastidx]) gap_lengths.append(gaplen) # now: initialize new segment gaplen = 1 startidx = idx lastidx = idx # manually close the last segment if exists if startidx >= 0: if lastidx < len(data) - 1 or fill_ends: # skip edge if not fill_ends missing_segs.append([startidx, lastidx]) # fill missing segments for seg in missing_segs: start_idx, stop_idx = seg if max_len is not None: if stop_idx - start_idx > max_len: continue # if startpoint is missing: constant value if start_idx == 0 and fill_ends: res[:stop_idx + 1] = res[stop_idx + 1] # if endpoint is missing: use constant value elif stop_idx == len(data)-1 and fill_ends: res[start_idx:] = res[start_idx - 1] # else: linear interpolation else: res[start_idx: stop_idx+1] = interp(range(start_idx, stop_idx + 1), [start_idx - 1, stop_idx + 1], data[[start_idx - 1, stop_idx + 1]]) return res
def pseudoSpect(A, npts=200, s=2., gridPointSelect=100, verbose=True, lstSqSolve=True): """ original code from http://www.cs.ox.ac.uk/projects/pseudospectra/psa.m % psa.m - Simple code for 2-norm pseudospectra of given matrix A. % Typically about N/4 times faster than the obvious SVD method. % Comes with no guarantees! - L. N. Trefethen, March 1999. parameter: A: the matrix to analyze npts: number of points at the grid s: axis limits (-s ... +s) gridPointSelect: ??? verbose: prints progress messages lstSqSolve: if true, use least squares in algorithm where solve could be used (probably) instead. (replacement for ldivide in MatLab) """ from scipy.linalg import schur, triu from pylab import (meshgrid, norm, dot, zeros, eye, diag, find, linspace, arange, isreal, inf, ones, lstsq, solve, sqrt, randn, eig, all) ldiv = lambda M1,M2 :lstsq(M1,M2)[0] if lstSqSolve else lambda M1,M2: solve(M1,M2) def planerot(x): ''' return (G,y) with a matrix G such that y = G*x with y[1] = 0 ''' G = zeros((2,2)) xn = x / norm(x) G[0,0] = xn[0] G[1,0] = -xn[1] G[0,1] = xn[1] G[1,1] = xn[0] return G, dot(G,x) xmin = -s xmax = s ymin = -s ymax = s; x = linspace(xmin,xmax,npts,endpoint=False) y = linspace(ymin,ymax,npts,endpoint=False) xx,yy = meshgrid(x,y) zz = xx + 1j*yy #% Compute Schur form and plot eigenvalues: T,Z = schur(A,output='complex'); T = triu(T) eigA = diag(T) # Reorder Schur decomposition and compress to interesting subspace: select = find( eigA.real > -250) # % <- ALTER SUBSPACE SELECTION n = len(select) for i in arange(n): for k in arange(select[i]-1,i,-1): #:-1:i G = planerot([T[k,k+1],T[k,k]-T[k+1,k+1]] )[0].T[::-1,::-1] J = slice(k,k+2) T[:,J] = dot(T[:,J],G) T[J,:] = dot(G.T,T[J,:]) T = triu(T[:n,:n]) I = eye(n); # Compute resolvent norms by inverse Lanczos iteration and plot contours: sigmin = inf*ones((len(y),len(x))); #A = eye(5) niter = 0 for i in arange(len(y)): # 1:length(y) if all(isreal(A)) and (ymax == -ymin) and (i > len(y)/2): sigmin[i,:] = sigmin[len(y) - i,:] else: for jj in arange(len(x)): z = zz[i,jj] T1 = z * I - T T2 = T1.conj().T if z.real < gridPointSelect: # <- ALTER GRID POINT SELECTION sigold = 0 qold = zeros((n,1)) beta = 0 H = zeros((100,100)) q = randn(n,1) + 1j*randn(n,1) while norm(q) < 1e-8: q = randn(n,1) + 1j*randn(n,1) q = q/norm(q) for k in arange(99): v = ldiv(T1,(ldiv(T2,q))) - dot(beta,qold) #stop alpha = dot(q.conj().T, v).real v = v - alpha*q beta = norm(v) qold = q q = v/beta H[k+1,k] = beta H[k,k+1] = beta H[k,k] = alpha if (alpha > 1e100): sig = alpha else: sig = max(abs(eig(H[:k+1,:k+1])[0])) if (abs(sigold/sig-1) < .001) or (sig < 3 and k > 2): break sigold = sig niter += 1 #print 'niter = ', niter #%text(x(jj),y(i),num2str(k)) % <- SHOW ITERATION COUNTS sigmin[i,jj] = 1./sqrt(sig); #end # end if verbose: print 'finished line ', str(i), ' out of ', str(len(y)) return x,y,sigmin
def spline(name, ages, knots, smoothing, interpolation_method='linear'): """ Generate PyMC objects for a piecewise constant Gaussian process (PCGP) model Parameters ---------- name : str knots : array, locations of the discontinuities in the piecewise constant function ages : array, points to interpolate to smoothing : pymc.Node, smoothness parameter for smoothing spline interpolation_method : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, 'cubic' Results ------- Returns dict of PyMC objects, including 'gamma' and 'mu_age' the observed stochastic likelihood and data predicted stochastic """ assert pl.all( pl.diff(knots) > 0), 'Spline knots must be strictly increasing' gamma = [ mc.Normal('gamma_%s_%d' % (name, k), 0., 10.**-2, value=-10.) for k in knots ] #gamma = [mc.Uniform('gamma_%s_%d'%(name,k), -20., 20., value=-10.) for k in knots] # TODO: fix AdaptiveMetropolis so that this is not necessary flat_gamma = mc.Lambda( 'flat_gamma_%s' % name, lambda gamma=gamma: pl.array([x for x in pl.flatten(gamma)])) import scipy.interpolate @mc.deterministic(name='mu_age_%s' % name) def mu_age(gamma=flat_gamma, knots=knots, ages=ages): mu = scipy.interpolate.interp1d(knots, pl.exp(gamma), kind=interpolation_method, bounds_error=False, fill_value=0.) return mu(ages) vars = dict(gamma=gamma, mu_age=mu_age, ages=ages, knots=knots) if (smoothing > 0) and (not pl.isinf(smoothing)): print 'adding smoothing of', smoothing @mc.potential(name='smooth_mu_%s' % name) def smooth_gamma(gamma=flat_gamma, knots=knots, tau=smoothing**-2): # the following is to include a "noise floor" so that level value # zero prior does not exert undue influence on age pattern # smoothing gamma = gamma.clip( pl.log(pl.exp(gamma).mean() / 10.), pl.inf) # only include smoothing on values within 10x of mean return mc.normal_like( pl.sqrt(pl.sum(pl.diff(gamma)**2 / pl.diff(knots))), 0, tau) vars['smooth_gamma'] = smooth_gamma return vars
def fit_posterior(dm, region, sex, year, fast_fit=False, inconsistent_fit=False, params_to_fit=['p', 'r', 'i'], zero_re=True, posteriors_only=False): """ Fit posterior of specified region/sex/year for specified model Parameters ---------- dm : DiseaseJson region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years fast_fit : sample 101 draws from posterior, don't try for convergence (fast for testing) inconsistent_fit : fit parameters separately params_to_fit : list of params to fit, if not fitting all consistently zero_re : bool, if true, enforce constraint that sibling area REs sum to zero posteriors_only : bool, if tru use data from 1997-2007 for 2005 and from 2007 on for 2010 Example ------- >>> import fit_posterior >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005') """ dir = dismod3.settings.JOB_WORKING_DIR % dm.id ## load the model from disk or from web import simplejson as json import data reload(data) try: model = data.ModelData.load(dir) print 'loaded data from new format from %s' % dir except (IOError, AssertionError): model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) #model.save(dir) print 'loaded data from json, saved in new format for next time in %s' % dir # TODO: check for missing covariates, and have them fixed, instead of filling them with zeros ## next block fills in missing covariates with zero for col in model.input_data.columns: if col.startswith('x_'): model.input_data[col] = model.input_data[col].fillna(0.) # also fill all covariates missing in output template with zeros model.output_template = model.output_template.fillna(0) predict_area = dismod3.utils.clean(region) predict_sex = dismod3.utils.clean(sex) predict_year = int(year) ## load emp_priors dict from dm.params param_type = dict(i='incidence', p='prevalence', r='remission', f='excess-mortality', rr='relative-risk', pf='prevalence_x_excess-mortality', m_with='mortality') emp_priors = {} for t in 'i r p f'.split(): # uncomment below to not use empirical prior for rate with zero data # if pl.all(model.input_data['data_type'] != t): # continue #key = dismod3.utils.gbd_key_for(param_type[t], model.hierarchy.predecessors(predict_area)[0], year, sex) key = dismod3.utils.gbd_key_for(param_type[t], predict_area, year, sex) mu = dm.get_mcmc('emp_prior_mean', key) #mu = dm.get_mcmc('emp_prior_median', key) sigma = dm.get_mcmc('emp_prior_std', key) if len(mu) == 101 and len(sigma) == 101: emp_priors[t, 'mu'] = mu # TODO: determine best way to propagate prior on function emp_priors[t, 'sigma'] = sigma # ALT 1: scale so that the joint probability is not a # function of the length of the age function # emp_priors[t, 'sigma'] = sigma * pl.sqrt(len(sigma)) ## update model.parameters['random_effects'] if there is information in the disease model expert_priors = model.parameters[t].get('random_effects', {}) model.parameters[t]['random_effects'] = dm.get_empirical_prior( param_type[t]).get('new_alpha', {}) model.parameters[t]['random_effects'].update(expert_priors) # shift random effects to make REs for observed children of predict area have mean zero re_mean = pl.mean([model.parameters[t]['random_effects'][area]['mu'] \ for area in model.hierarchy.neighbors(predict_area) \ if area in model.parameters[t]['random_effects']]) for area in model.hierarchy.neighbors(predict_area): if area in model.parameters[t]['random_effects']: model.parameters[t]['random_effects'][area]['mu'] -= re_mean ## update model.parameters['fixed_effects'] if there is information in the disease model expert_fe_priors = model.parameters[t].get('fixed_effects', {}) model.parameters[t]['fixed_effects'].update( dm.get_empirical_prior(param_type[t]).get('new_beta', {})) ## create model and priors for region/sex/year # select data that is about areas in this region, recent years, and sex of male or total only assert predict_area in model.hierarchy, 'region %s not found in area hierarchy' % predict_area subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area) def is_relevant(r): if (r['area'] not in subtree) and r['area'] != 'all': return False if predict_year == 1990: if r['year_start'] > 1997: return False elif predict_year == 2005: if posteriors_only: if r['year_end'] < 1997 or r['year_start'] > 2007: return False else: if r['year_end'] < 1997: return False elif predict_year == 2010: if posteriors_only: if r['data_type'] == 'm_all': # include m_all data from 2005, since 2010 is not loaded if r['year_end'] < 1997: return False else: if r['year_end'] < 2007: return False else: if r['year_end'] < 1997: return False else: assert 0, 'Predictions for year %d not yet implemented' % predict_year if r['sex'] not in [predict_sex, 'total']: return False return True old_relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if (r['area'] in subtree or r['area'] == 'all')\ and ((predict_year >= 1997 and r['year_end'] >= 1997) or (predict_year <= 1997 and r['year_start'] <= 1997)) \ and r['sex'] in [predict_sex, 'total']] relevant_rows = model.input_data.index[model.input_data.apply(is_relevant, axis=1)] if predict_year == 1990: assert pl.all( relevant_rows == old_relevant_rows ), "relevant rows should be the same in new and old implementation for 1990" if not posteriors_only: assert pl.all( relevant_rows == old_relevant_rows ), "relevant rows should be the same in new and old implementation when posteriors_only is False" model.input_data = model.input_data.ix[relevant_rows] # replace area 'all' with predict_area model.input_data['area'][model.input_data['area'] == 'all'] = predict_area if inconsistent_fit: # generate fits for requested parameters inconsistently for t in params_to_fit: model.vars += ism.age_specific_rate( model, t, reference_area=predict_area, reference_sex=predict_sex, reference_year=predict_year, mu_age=None, mu_age_parent=emp_priors.get((t, 'mu')), sigma_age_parent=emp_priors.get((t, 'sigma')), rate_type=(t == 'rr') and 'log_normal' or 'neg_binom', zero_re=zero_re) if fast_fit: dismod3.fit.fit_asr(model, t, iter=101, burn=0, thin=1, tune_interval=100) else: dismod3.fit.fit_asr(model, t, iter=iter, burn=burn, thin=thin, tune_interval=100) else: model.vars += ism.consistent(model, reference_area=predict_area, reference_sex=predict_sex, reference_year=predict_year, priors=emp_priors, zero_re=zero_re) ## fit model to data if fast_fit: dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, 105, 0, 1, 100) else: dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, iter=iter, burn=burn, thin=thin, tune_interval=100, verbose=True) # generate estimates posteriors = {} for t in 'i r f p rr pf m_with X'.split(): if t in model.vars: if t in model.parameters and 'level_bounds' in model.parameters[t]: lower = model.parameters[t]['level_bounds']['lower'] upper = model.parameters[t]['level_bounds']['upper'] else: lower = 0 upper = pl.inf posteriors[t] = covariate_model.predict_for( model, model.parameters.get(t, {}), predict_area, predict_sex, predict_year, predict_area, predict_sex, predict_year, True, # population weighted averages model.vars[t], lower, upper) try: graphics.plot_fit(model, vars, emp_priors, {}) pl.savefig(dir + '/image/posterior-%s+%s+%s.png' % (predict_area, predict_sex, predict_year)) except Exception, e: print 'Error generating output graphics' print e
def win(board, letter): wins = logical_or(board == letter, board == 'T') return any(all(wins, 0)) or any(all(wins, 1)) or all(diag(wins)) or \ all(diag(rot90(wins)))
def setup_asr_step_methods(m, vars, additional_stochs=[]): # groups RE stochastics that are suspected of being dependent groups = [] fe_group = [ n for n in vars.get('beta', []) if isinstance(n, mc.Stochastic) ] ap_group = [ n for n in vars.get('gamma', []) if isinstance(n, mc.Stochastic) ] groups += [[g_i, g_j] for g_i, g_j in zip(ap_group[1:], ap_group[:-1]) ] + [fe_group, ap_group, fe_group + ap_group] for a in vars.get('hierarchy', []): group = [] col_map = dict([[key, i] for i, key in enumerate(vars['U'].columns)]) if a in vars['U']: for b in nx.shortest_path(vars['hierarchy'], 'all', a): if b in vars['U']: n = vars['alpha'][col_map[b]] if isinstance(n, mc.Stochastic): group.append(n) groups.append(group) #if len(group) > 0: #group += ap_group #groups.append(group) #group += fe_group #groups.append(group) for stoch in groups: if len(stoch) > 0 and pl.all( [isinstance(n, mc.Stochastic) for n in stoch]): # only step certain stochastics, for understanding convergence #if 'gamma_i' not in stoch[0].__name__: # print 'no stepper for', stoch # m.use_step_method(mc.NoStepper, stoch) # continue #print 'finding Normal Approx for', [n.__name__ for n in stoch] if additional_stochs == []: vars_to_fit = [ vars.get('p_obs'), vars.get('pi_sim'), vars.get('smooth_gamma'), vars.get('parent_similarity'), vars.get('mu_sim'), vars.get('mu_age_derivative_potential'), vars.get('covariate_constraint') ] else: vars_to_fit = additional_stochs try: raise ValueError na = mc.NormApprox(vars_to_fit + stoch) na.fit(method='fmin_powell', verbose=0) cov = pl.array(pl.inv(-na.hess), order='F') #print 'opt:', pl.round_([n.value for n in stoch], 2) #print 'cov:\n', cov.round(4) if pl.all(pl.eigvals(cov) >= 0): m.use_step_method(mc.AdaptiveMetropolis, stoch, cov=cov) else: raise ValueError except ValueError: #print 'cov matrix is not positive semi-definite' m.use_step_method(mc.AdaptiveMetropolis, stoch)
def solve_stability(self): """ Solve the 2nd-order stability problem """ def global_mask(fun1, fun2, V): # Find the indices where fun1 ~= fun2 at a tolerance diff = fun1.vector() - fun2.vector() diff.abs() diff_glob = PETScVector(mpi_comm_self()) diff.gather(diff_glob, pl.array(range(V.dim()), "intc")) mask = diff_glob < DOLFIN_EPS_LARGE return mask # Locate the elastic part mask = global_mask(self.alpha, self.alpha_prev, self.V_alpha) if pl.all(mask): self.print0("\033[1;36m 2nd stability: elastic phase\033[1;m") self.rq = pl.inf return True else: self._u_alpha_prev.vector()[:] = 0.0 self._u_alpha.vector()[:] = 1.0 assign(self._u_alpha_prev.sub(1), self.alpha_prev) assign(self._u_alpha.sub(1), self.alpha) mask = global_mask(self._u_alpha, self._u_alpha_prev, self._V_u_alpha) self.elas_dofs = set((pl.where(mask == True)[0]).astype(pl.intc)) bc_elas_dofs = self.elas_dofs.union(self.bc_dofs) indices = sorted( set(range(self.ownership[0], self.ownership[1])) - bc_elas_dofs) # Assemble K and M self._K = PETScMatrix() self._M = PETScMatrix() assemble(self._rqP, self._K) assemble(self._rqN, self._M) self._K_mat = self._K.mat() self._M_mat = self._M.mat() # Eliminate the elastic/BC part using PETSc.IS self.IS = PETSc.IS() self.IS.createGeneral(indices) self._K_mat_reduced = self._K_mat.getSubMatrix(self.IS, self.IS) self._K = PETScMatrix(self._K_mat_reduced) self._M_mat_reduced = self._M_mat.getSubMatrix(self.IS, self.IS) self._M = PETScMatrix(self._M_mat_reduced) # Stop if M ~= 0 if self._M.norm("linf") < DOLFIN_EPS_LARGE: self.rq = pl.inf self.print0( "\033[1;36m 2nd stability: Rayleigh quotient: %.3e\033[1;m" % self.rq) return True # Setup the eigenvalue solver self.eigensolver = SLEPcEigenSolver(self._K, self._M) self.set_eigensolver_parameters() # Use last known directions for initial guess assign(self._u_alpha.sub(0), self.V) assign(self._u_alpha.sub(1), self.Beta) _u_alpha_vec = as_backend_type(self._u_alpha.vector()).vec() _u_alpha_vec_reduced = _u_alpha_vec.getSubVector(self.IS) # self.eps.setInitialSpace(_u_alpha_vec_reduced) # Solve the eigenvalue problem self.print0( "\033[1;36m 2nd stability: solving the eigenvalue problem\033[1;m" ) self.eps.solve() r, c, rx, cx = self.eigensolver.get_eigenpair(0) self.print0("\033[1;36m 2nd stability: smallest ev: %.3e\033[1;m" % r) # From reduced vector to full vector self.scatter = PETSc.Scatter() rx_vec = as_backend_type(rx).vec() self.scatter.create(_u_alpha_vec_reduced, None, _u_alpha_vec, self.IS) _u_alpha_vec.zeroEntries() self.scatter.scatter(rx_vec, _u_alpha_vec) _u_alpha_vec.ghostUpdate() # Check the Rayleigh quotient (in theory we should have r == rq) self.rq = assemble(self.rqP) / assemble(self.rqN) if abs(r - self.rq) > DOLFIN_EPS_LARGE: self.print0( "\033[1;36m 2nd stability: Rayleigh quotient: %.3e\033[1;m" % self.rq) # Obtain the perturbation directions to V and Beta assign(self.V, self._u_alpha.sub(0)) assign(self.Beta, self._u_alpha.sub(1)) # Scale V u_mean = self.u.vector().norm("l2") if self.V.vector().norm("l2") > DOLFIN_EPS_LARGE: coeff = u_mean / self.V.vector().norm("l2") self.V.vector()[:] = coeff * self.V.vector() # Scale and project Beta to the admissible space alpha_mean = self.alpha.vector().norm("l2") if self.Beta.vector().norm("l2") > DOLFIN_EPS_LARGE: coeff = alpha_mean / self.Beta.vector().norm("l2") self.Beta.vector()[:] = coeff * self.Beta.vector() self.Beta.vector()[self.Beta.vector() < 0] = 0.0 # Determine if the solution is unique if self.rq > 1: return True
def fill_nan(data, max_len=None, fill_ends=True): """ Fills the "nan" fields of a 1D array with linear interpolated values. At the edges, constant values are assumed. :args: data (1d array): the input data max_len (int or None): maximal length of gaps to fill fill_ends (bool): whether or not to fill the ends :returns: data' (1d array): a copy of the input data, where `nan`-values are replaced by a linear interpolation between adjacent values """ res = data.copy() if all(isnan(data)): return res missing_idx = find(isnan(data)) # group to missing segments missing_segs = [] gap_lengths = [] lastidx = -2 # some invalid index: idx == lastidx + 1 cannot be true for this! startidx = -2 # some invalid index gaplen = 0 for idx in missing_idx: if idx == lastidx + 1: # all right, the segment continues lastidx = idx gaplen += 1 else: # a new segment has started # first: "close" old segment if exists if startidx >= 0: missing_segs.append([startidx, lastidx]) gap_lengths.append(gaplen) # now: initialize new segment gaplen = 1 startidx = idx lastidx = idx # manually close the last segment if exists if startidx >= 0: if lastidx < len(data) - 1 or fill_ends: # skip edge if not fill_ends missing_segs.append([startidx, lastidx]) # fill missing segments for seg in missing_segs: start_idx, stop_idx = seg if max_len is not None: if stop_idx - start_idx > max_len: continue # if startpoint is missing: constant value if start_idx == 0 and fill_ends: res[:stop_idx + 1] = res[stop_idx + 1] # if endpoint is missing: use constant value elif stop_idx == len(data) - 1 and fill_ends: res[start_idx:] = res[start_idx - 1] # else: linear interpolation else: res[start_idx:stop_idx + 1] = interp( range(start_idx, stop_idx + 1), [start_idx - 1, stop_idx + 1], data[[start_idx - 1, stop_idx + 1]]) return res
def predict_for(model, parameters, root_area, root_sex, root_year, area, sex, year, population_weighted, vars, lower, upper): """ Generate draws from posterior predicted distribution for a specific (area, sex, year) :Parameters: - `model` : data.DataModel - `root_area` : str, area for which this model was fit consistently - `root_sex` : str, area for which this model was fit consistently - `root_year` : str, area for which this model was fit consistently - `area` : str, area to predict for - `sex` : str, sex to predict for - `year` : str, year to predict for - `population_weighted` : bool, should prediction be population weighted if it is the aggregation of units area RE hierarchy? - `vars` : dict, including entries for alpha, beta, mu_age, U, and X - `lower, upper` : float, bounds on predictions from expert priors :Results: - Returns array of draws from posterior predicted distribution """ area_hierarchy = model.hierarchy output_template = model.output_template.copy() # find number of samples from posterior len_trace = len(vars['mu_age'].trace()) # compile array of draws from posterior distribution of alpha (random effect covariate values) # a row for each draw from the posterior distribution # a column for each random effect (e.g. countries with data, regions with countries with data, etc) # # there are several cases to handle, or at least at one time there were: # vars['alpha'] is a pymc Stochastic with an array for its value (no longer used?) # vars['alpha'] is a list of pymc Nodes # vars['alpha'] is a list of floats # vars['alpha'] is a list of some floats and some pymc Nodes # 'alpha' is not in vars # # when vars['alpha'][i] is a float, there is also information on the uncertainty in this value, stored in # vars['const_alpha_sigma'][i], which is not used when fitting the model, but should be incorporated in # the prediction if 'alpha' in vars and isinstance(vars['alpha'], mc.Node): assert 0, 'No longer used' alpha_trace = vars['alpha'].trace() elif 'alpha' in vars and isinstance(vars['alpha'], list): alpha_trace = [] for n, sigma in zip(vars['alpha'], vars['const_alpha_sigma']): if isinstance(n, mc.Node): alpha_trace.append(n.trace()) else: # uncertainty of constant alpha incorporated here sigma = max(sigma, 1.e-9) # make sure sigma is non-zero assert not pl.isnan(sigma) alpha_trace.append(mc.rnormal(float(n), sigma**-2, size=len_trace)) alpha_trace = pl.vstack(alpha_trace).T else: alpha_trace = pl.array([]) # compile array of draws from posterior distribution of beta (fixed effect covariate values) # a row for each draw from the posterior distribution # a column for each fixed effect # # there are several cases to handle, or at least at one time there were: # vars['beta'] is a pymc Stochastic with an array for its value (no longer used?) # vars['beta'] is a list of pymc Nodes # vars['beta'] is a list of floats # vars['beta'] is a list of some floats and some pymc Nodes # 'beta' is not in vars # # when vars['beta'][i] is a float, there is also information on the uncertainty in this value, stored in # vars['const_beta_sigma'][i], which is not used when fitting the model, but should be incorporated in # the prediction # # TODO: refactor to reduce duplicate code (this is very similar to code for alpha above) if 'beta' in vars and isinstance(vars['beta'], mc.Node): assert 0, 'No longer used' beta_trace = vars['beta'].trace() elif 'beta' in vars and isinstance(vars['beta'], list): beta_trace = [] for n, sigma in zip(vars['beta'], vars['const_beta_sigma']): if isinstance(n, mc.Node): beta_trace.append(n.trace()) else: # uncertainty of constant beta incorporated here sigma = max(sigma, 1.e-9) # make sure sigma is non-zero assert not pl.isnan(sigma) beta_trace.append(mc.rnormal(float(n), sigma**-2., size=len_trace)) beta_trace = pl.vstack(beta_trace).T else: beta_trace = pl.array([]) # the prediction for the requested area is produced by aggregating predictions for all of the childred # of that area in the area_hierarchy (a networkx.DiGraph) leaves = [n for n in nx.traversal.bfs_tree(area_hierarchy, area) if area_hierarchy.successors(n) == []] if len(leaves) == 0: # networkx returns an empty list when the bfs tree is a single node leaves = [area] # initialize covariate_shift and total_population covariate_shift = pl.zeros(len_trace) total_population = 0. # group output_template for easy access output_template = output_template.groupby(['area', 'sex', 'year']).mean() # if there are fixed effects, the effect coefficients are stored as an array in vars['X'] # use this to put together a covariate matrix for the predictions, according to the output_template # covariate values # # the resulting array is covs if 'X' in vars: covs = output_template.filter(vars['X'].columns) if 'x_sex' in vars['X'].columns: covs['x_sex'] = sex_value[sex] assert pl.all(covs.columns == vars['X_shift'].index), 'covariate columns and unshift index should match up' for x_i in vars['X_shift'].index: covs[x_i] -= vars['X_shift'][x_i] # shift covariates so that the root node has X_ar,sr,yr == 0 else: covs = pandas.DataFrame(index=output_template.index) # if there are random effects, put together an indicator based on # their hierarchical relationships # if 'U' in vars: p_U = area_hierarchy.number_of_nodes() # random effects for area U_l = pandas.DataFrame(pl.zeros((1, p_U)), columns=area_hierarchy.nodes()) U_l = U_l.filter(vars['U'].columns) else: U_l = pandas.DataFrame(index=[0]) # loop through leaves of area_hierarchy subtree rooted at 'area', # make prediction for each using appropriate random # effects and appropriate fixed effect covariates # for l in leaves: log_shift_l = pl.zeros(len_trace) U_l.ix[0,:] = 0. root_to_leaf = nx.shortest_path(area_hierarchy, root_area, l) for node in root_to_leaf[1:]: if node not in U_l.columns: ## Add a columns U_l[node] = rnormal(0, appropriate_tau) level = len(nx.shortest_path(area_hierarchy, 'all', node))-1 if 'sigma_alpha' in vars: tau_l = vars['sigma_alpha'][level].trace()**-2 U_l[node] = 0. # if this node was not already included in the alpha_trace array, add it # there are several cases for adding: # if the random effect has a distribution of Constant # add it, using a sigma as well # otherwise, sample from a normal with mean zero and standard deviation tau_l if parameters.get('random_effects', {}).get(node, {}).get('dist') == 'Constant': mu = parameters['random_effects'][node]['mu'] sigma = parameters['random_effects'][node]['sigma'] sigma = max(sigma, 1.e-9) # make sure sigma is non-zero alpha_node = mc.rnormal(mu, sigma**-2, size=len_trace) else: if 'sigma_alpha' in vars: alpha_node = mc.rnormal(0., tau_l) else: alpha_node = pl.zeros(len_trace) if len(alpha_trace) > 0: alpha_trace = pl.vstack((alpha_trace.T, alpha_node)).T else: alpha_trace = pl.atleast_2d(alpha_node).T # TODO: implement a more robust way to align alpha_trace and U_l U_l.ix[0, node] = 1. # 'shift' the random effects matrix to have the intended # level of the hierarchy as the reference value if 'U_shift' in vars: for node in vars['U_shift']: U_l -= vars['U_shift'][node] # add the random effect intercept shift (len_trace draws) log_shift_l += pl.dot(alpha_trace, U_l.T).flatten() # make X_l if len(beta_trace) > 0: X_l = covs.ix[l, sex, year] log_shift_l += pl.dot(beta_trace, X_l.T).flatten() if population_weighted: # combine in linear-space with population weights shift_l = pl.exp(log_shift_l) covariate_shift += shift_l * output_template['pop'][l,sex,year] total_population += output_template['pop'][l,sex,year] else: # combine in log-space without weights covariate_shift += log_shift_l total_population += 1. if population_weighted: covariate_shift /= total_population else: covariate_shift = pl.exp(covariate_shift / total_population) parameter_prediction = (vars['mu_age'].trace().T * covariate_shift).T # clip predictions to bounds from expert priors parameter_prediction = parameter_prediction.clip(lower, upper) return parameter_prediction
def fit_consistent(model, iter=2000, burn=1000, thin=1, tune_interval=100, verbose=False): """Fit data model for all epidemiologic parameters using MCMC :Parameters: - `model` : data.ModelData - `iter` : int, number of posterior samples fit - `burn` : int, number of posterior samples to discard as burn-in - `thin` : int, samples thinned by this number - `tune_interval` : int - `verbose` : boolean :Results: - returns a pymc.MCMC object created from vars, that has been fit with MCMC .. note:: - `burn` must be less than `iter` - `thin` must be less than `iter` minus `burn` """ assert burn < iter, 'burn must be less than iter' assert thin < iter - burn, 'thin must be less than iter-burn' param_types = 'i r f p pf rr smr m_with X'.split() vars = model.vars start_time = time.time() map = mc.MAP(vars) m = mc.MCMC(vars) ## use MAP to generate good initial conditions try: method='fmin_powell' tol=.001 fit_model.logger.info('fitting submodels') fit_model.find_consistent_spline_initial_vals(vars, method, tol, verbose) for t in param_types: fit_model.find_re_initial_vals(vars[t], method, tol, verbose) fit_model.logger.info('.') fit_model.find_consistent_spline_initial_vals(vars, method, tol, verbose) fit_model.logger.info('.') for t in param_types: fit_model.find_fe_initial_vals(vars[t], method, tol, verbose) fit_model.logger.info('.') fit_model.find_consistent_spline_initial_vals(vars, method, tol, verbose) fit_model.logger.info('.') for t in param_types: fit_model.find_dispersion_initial_vals(vars[t], method, tol, verbose) fit_model.logger.info('.') fit_model.logger.info('\nfitting all stochs\n') map.fit(method=method, tol=tol, verbose=verbose) if verbose: from fit_posterior import inspect_vars print inspect_vars({}, vars) except KeyboardInterrupt: fit_model.logger.warning('Initial condition calculation interrupted') ## use MCMC to fit the model try: fit_model.logger.info('finding step covariances') vars_to_fit = [[vars[t].get('p_obs'), vars[t].get('pi_sim'), vars[t].get('smooth_gamma'), vars[t].get('parent_similarity'), vars[t].get('mu_sim'), vars[t].get('mu_age_derivative_potential'), vars[t].get('covariate_constraint')] for t in param_types] max_knots = max([len(vars[t]['gamma']) for t in 'irf']) for i in range(max_knots): stoch = [vars[t]['gamma'][i] for t in 'ifr' if i < len(vars[t]['gamma'])] if verbose: print 'finding Normal Approx for', [n.__name__ for n in stoch] try: na = mc.NormApprox(vars_to_fit + stoch) na.fit(method='fmin_powell', verbose=verbose) cov = pl.array(pl.inv(-na.hess), order='F') if pl.all(pl.eigvals(cov) >= 0): m.use_step_method(mc.AdaptiveMetropolis, stoch, cov=cov) else: raise ValueError except ValueError: if verbose: print 'cov matrix is not positive semi-definite' m.use_step_method(mc.AdaptiveMetropolis, stoch) fit_model.logger.info('.') for t in param_types: fit_model.setup_asr_step_methods(m, vars[t], vars_to_fit) # reset values to MAP fit_model.find_consistent_spline_initial_vals(vars, method, tol, verbose) fit_model.logger.info('.') map.fit(method=method, tol=tol, verbose=verbose) fit_model.logger.info('.') except KeyboardInterrupt: fit_model.logger.warning('Initial condition calculation interrupted') fit_model.logger.info('\nsampling from posterior distribution\n') m.iter=iter m.burn=burn m.thin=thin if verbose: try: m.sample(m.iter, m.burn, m.thin, tune_interval=tune_interval, progress_bar=True, progress_bar_fd=sys.stdout) except TypeError: m.sample(m.iter, m.burn, m.thin, tune_interval=tune_interval, progress_bar=False, verbose=verbose) else: m.sample(m.iter, m.burn, m.thin, tune_interval=tune_interval, progress_bar=False) m.wall_time = time.time() - start_time model.map = map model.mcmc = m return model.map, model.mcmc
def converged(q): n = array([pylab.norm(obj.diff(q)) for _, obj in self.objectives]) return pylab.all(n < 1e-3)
def add_data_vector_segment(self, data_vector_segment, last_segment=False): self.message = None if self.__prefixed_movie_name__ == None: return if last_segment: self.__save_frames__() return frame_name = '%s_%06d' % (self.__prefixed_movie_name__, self.idx) frame_file = as_path(self.__prefixed_movie_dir__, frame_name + '.png') #skip_frame = True if self.idx < self.p.movie_skip_to_frame or \ # (self.p.movie_skip_frames and os.path.exists(frame_file)) \ # else False skip_frame = False mean_plus = pl.mean(data_vector_segment.signal_plus) mean_minus = pl.mean(data_vector_segment.signal_minus) s_plus = len(data_vector_segment.signal_plus) _pp_spec = MiniPoincarePlotSpec() _pp_spec.idx = self.idx _pp_spec.s_plus = s_plus _pp_spec.mean_plus = mean_plus _pp_spec.mean_minus = mean_minus _pp_spec.range = self.range _pp_spec.frame_file = frame_file if self.idx == 0: self.s_size = s_plus self.x_data.put(pl.arange(s_plus), data_vector_segment.signal_plus) self.y_data.put(pl.arange(s_plus), data_vector_segment.signal_minus) ok = True old_s_plus = 0 _pp_spec.level = 0 _pp_spec.active_start = 0 _pp_spec.active_stop = s_plus else: old_s_plus = len(self.old_signal_plus) ok = False if s_plus >= old_s_plus: if pl.all(self.old_signal_plus \ == data_vector_segment.signal_plus[:old_s_plus]): old_size = self.s_size new_size = old_size + s_plus - old_s_plus if new_size > old_size: _pp_spec.active_start = old_size _pp_spec.active_stop = new_size if new_size > len(self.x_data): raise Exception( 'New size is greater then the signal size !') self.x_data.put( pl.arange(old_size, new_size), data_vector_segment.signal_plus[old_s_plus - s_plus:]) self.y_data.put( pl.arange(old_size, new_size), data_vector_segment.signal_minus[old_s_plus - s_plus:]) _pp_spec.inactive_stop = \ self._pp_spec_old.inactive_stop self.s_size = new_size _pp_spec.level = 1 ok = True else: for idx in xrange(1, old_s_plus): if pl.all(self.old_signal_plus[idx:] \ == data_vector_segment.signal_plus[idx - 1: old_s_plus - idx]): old_size = self.s_size new_size = old_size + s_plus - (old_s_plus - idx) if new_size > len(self.x_data): raise Exception( 'New size is greater then the signal size !' ) if new_size > old_size: _pp_spec.active_start = old_size _pp_spec.active_stop = new_size self.x_data.put( pl.arange(old_size, new_size), data_vector_segment. signal_plus[old_s_plus - idx:]) self.y_data.put( pl.arange(old_size, new_size), data_vector_segment. signal_minus[old_s_plus - idx:]) self.s_size = new_size _d = self.s_size - s_plus _pp_spec.inactive_start = _d - idx _pp_spec.inactive_stop = _d _pp_spec.level = 3 ok = True break else: for idx in xrange(1, old_s_plus): if idx + s_plus <= old_s_plus \ and pl.all( self.old_signal_plus[idx:idx + s_plus] \ == data_vector_segment.signal_plus): _d = self.s_size - old_s_plus _pp_spec.inactive_start = _d _pp_spec.inactive_stop = _d + idx if _pp_spec.inactive_stop + s_plus < self.s_size: _pp_spec.inactive_start_2 = \ _pp_spec.inactive_stop + s_plus _pp_spec.inactive_stop_2 = self.s_size _pp_spec.level = 2 ok = True break if ok == True and skip_frame == False: _pp_spec.x_data = self.x_data _pp_spec.y_data = self.y_data _pp_spec.cum_inactive = self.cum_inactive _pp_spec.s_size = self.s_size #print('PP_SPEC: ' + str(_p)) self.pp_spec_manager.addMiniPoincarePlotSpec(_pp_spec) if self.idx > 0 and \ (self.p.movie_bin_size > 0 and ((self.idx % self.p.movie_bin_size) == 0)): if len(self.pp_specs_managers) >= self.core_nums: if self.p.movie_calculate_all_frames == False: self.__save_frames__() self.pp_specs_managers = [] old_pp_spec_manager = self.pp_spec_manager self.pp_spec_manager = MiniPoincarePlotSpecManager() self.pp_spec_manager.movie_dir = self.__prefixed_movie_dir__ self.pp_spec_manager.movie_name = self.__prefixed_movie_name__ self.pp_spec_manager.movie_dpi = self.p.movie_dpi self.pp_spec_manager.movie_fps = self.p.movie_fps self.pp_spec_manager.movie_height = self.p.movie_height self.pp_spec_manager.movie_width = self.p.movie_width self.pp_spec_manager.active_color = self.active_color self.pp_spec_manager.inactive_color = self.inactive_color self.pp_spec_manager.centroid_color = self.centroid_color self.pp_spec_manager.active_point_size = \ self.p.movie_active_size self.pp_spec_manager.inactive_point_size = \ self.p.movie_inactive_size self.pp_spec_manager.centroid_point_size = \ self.p.movie_centroid_size self.pp_spec_manager.show_plot_legends = \ self.p.movie_show_plot_legends self.pp_spec_manager.x_label = self.p.x_label self.pp_spec_manager.y_label = self.p.y_label self.pp_spec_manager.clean_frames = self.p.movie_clean_frames self.pp_spec_manager.movie_title = self.p.movie_title self.pp_spec_manager.movie_frame_step = self.p.movie_frame_step self.pp_spec_manager.movie_identity_line = self.p.movie_identity_line self.pp_spec_manager.movie_hour_label = self.p.movie_hour_label self.pp_spec_manager.movie_minute_label = self.p.movie_minute_label self.pp_spec_manager.movie_second_label = self.p.movie_second_label self.pp_spec_manager.movie_time_label_in_line = self.p.movie_time_label_in_line self.pp_spec_manager.movie_time_label_font_size = self.p.movie_time_label_font_size self.pp_spec_manager.movie_time_label_prefix = self.p.movie_time_label_prefix self.pp_spec_manager.movie_title_font_size = self.p.movie_title_font_size self.pp_spec_manager.movie_axis_font_size = self.p.movie_axis_font_size self.pp_spec_manager.movie_axis_font = self.p.movie_axis_font self.pp_spec_manager.movie_title_font = self.p.movie_title_font self.pp_spec_manager.movie_tick_font = self.p.movie_tick_font self.pp_spec_manager.movie_frame_pad = self.p.movie_frame_pad self.pp_spec_manager.movie_create_time_label = self.p.movie_create_time_label self.pp_spec_manager.movie_frame_filename_with_time = self.p.movie_frame_filename_with_time #add all previous pp specs for pp_spec in old_pp_spec_manager.getMiniPoincarePlotSpecs(): self.pp_spec_manager.addPreviousPoincarePlotSpecMinimum( pp_spec) old_pp_spec_manager = None self.pp_specs_managers.append(self.pp_spec_manager) self.message = 'Prepare frame: %s' % (frame_name) elif ok == True and skip_frame == True: self.message = 'Skip frame %s' % (frame_name) elif ok == False: print('s_plus: ' + str(s_plus) + ' old_s_plus: ' + str(old_s_plus)) print('old_signal_plus: ' + str(self.old_signal_plus)) print('signal_plus: ' + str(data_vector_segment.signal_plus)) raise Exception('Error for idx ' + str(self.idx)) if _pp_spec.inactive_start >= 0 and _pp_spec.inactive_stop >= 0: #if time array is not None use it as array for cumulative time if not self.time == None: self.cum_inactive += pl.sum( self.time[_pp_spec.inactive_start:_pp_spec.inactive_stop]) else: self.cum_inactive += pl.sum( self.x_data[_pp_spec.inactive_start:_pp_spec. inactive_stop]) self.old_signal_plus = data_vector_segment.signal_plus self.idx = self.idx + 1 self._pp_spec_old = _pp_spec
def predict_for(model, parameters, root_area, root_sex, root_year, area, sex, year, population_weighted, vars, lower, upper): """ Generate draws from posterior predicted distribution for a specific (area, sex, year) :Parameters: - `model` : data.DataModel - `root_area` : str, area for which this model was fit consistently - `root_sex` : str, area for which this model was fit consistently - `root_year` : str, area for which this model was fit consistently - `area` : str, area to predict for - `sex` : str, sex to predict for - `year` : str, year to predict for - `population_weighted` : bool, should prediction be population weighted if it is the aggregation of units area RE hierarchy? - `vars` : dict, including entries for alpha, beta, mu_age, U, and X - `lower, upper` : float, bounds on predictions from expert priors :Results: - Returns array of draws from posterior predicted distribution """ area_hierarchy = model.hierarchy output_template = model.output_template.copy() # find number of samples from posterior len_trace = len(vars['mu_age'].trace()) # compile array of draws from posterior distribution of alpha (random effect covariate values) # a row for each draw from the posterior distribution # a column for each random effect (e.g. countries with data, regions with countries with data, etc) # # there are several cases to handle, or at least at one time there were: # vars['alpha'] is a pymc Stochastic with an array for its value (no longer used?) # vars['alpha'] is a list of pymc Nodes # vars['alpha'] is a list of floats # vars['alpha'] is a list of some floats and some pymc Nodes # 'alpha' is not in vars # # when vars['alpha'][i] is a float, there is also information on the uncertainty in this value, stored in # vars['const_alpha_sigma'][i], which is not used when fitting the model, but should be incorporated in # the prediction if 'alpha' in vars and isinstance(vars['alpha'], mc.Node): assert 0, 'No longer used' alpha_trace = vars['alpha'].trace() elif 'alpha' in vars and isinstance(vars['alpha'], list): alpha_trace = [] for n, sigma in zip(vars['alpha'], vars['const_alpha_sigma']): if isinstance(n, mc.Node): alpha_trace.append(n.trace()) else: # uncertainty of constant alpha incorporated here sigma = max(sigma, 1.e-9) # make sure sigma is non-zero assert not pl.isnan(sigma) alpha_trace.append( mc.rnormal(float(n), sigma**-2, size=len_trace)) alpha_trace = pl.vstack(alpha_trace).T else: alpha_trace = pl.array([]) # compile array of draws from posterior distribution of beta (fixed effect covariate values) # a row for each draw from the posterior distribution # a column for each fixed effect # # there are several cases to handle, or at least at one time there were: # vars['beta'] is a pymc Stochastic with an array for its value (no longer used?) # vars['beta'] is a list of pymc Nodes # vars['beta'] is a list of floats # vars['beta'] is a list of some floats and some pymc Nodes # 'beta' is not in vars # # when vars['beta'][i] is a float, there is also information on the uncertainty in this value, stored in # vars['const_beta_sigma'][i], which is not used when fitting the model, but should be incorporated in # the prediction # # TODO: refactor to reduce duplicate code (this is very similar to code for alpha above) if 'beta' in vars and isinstance(vars['beta'], mc.Node): assert 0, 'No longer used' beta_trace = vars['beta'].trace() elif 'beta' in vars and isinstance(vars['beta'], list): beta_trace = [] for n, sigma in zip(vars['beta'], vars['const_beta_sigma']): if isinstance(n, mc.Node): beta_trace.append(n.trace()) else: # uncertainty of constant beta incorporated here sigma = max(sigma, 1.e-9) # make sure sigma is non-zero assert not pl.isnan(sigma) beta_trace.append( mc.rnormal(float(n), sigma**-2., size=len_trace)) beta_trace = pl.vstack(beta_trace).T else: beta_trace = pl.array([]) # the prediction for the requested area is produced by aggregating predictions for all of the childred # of that area in the area_hierarchy (a networkx.DiGraph) leaves = [ n for n in nx.traversal.bfs_tree(area_hierarchy, area) if area_hierarchy.successors(n) == [] ] if len(leaves) == 0: # networkx returns an empty list when the bfs tree is a single node leaves = [area] # initialize covariate_shift and total_population covariate_shift = pl.zeros(len_trace) total_population = 0. # group output_template for easy access output_template = output_template.groupby(['area', 'sex', 'year']).mean() # if there are fixed effects, the effect coefficients are stored as an array in vars['X'] # use this to put together a covariate matrix for the predictions, according to the output_template # covariate values # # the resulting array is covs if 'X' in vars: covs = output_template.filter(vars['X'].columns) if 'x_sex' in vars['X'].columns: covs['x_sex'] = sex_value[sex] assert pl.all(covs.columns == vars['X_shift'].index ), 'covariate columns and unshift index should match up' for x_i in vars['X_shift'].index: covs[x_i] -= vars['X_shift'][ x_i] # shift covariates so that the root node has X_ar,sr,yr == 0 else: covs = pandas.DataFrame(index=output_template.index) # if there are random effects, put together an indicator based on # their hierarchical relationships # if 'U' in vars: p_U = area_hierarchy.number_of_nodes() # random effects for area U_l = pandas.DataFrame(pl.zeros((1, p_U)), columns=area_hierarchy.nodes()) U_l = U_l.filter(vars['U'].columns) else: U_l = pandas.DataFrame(index=[0]) # loop through leaves of area_hierarchy subtree rooted at 'area', # make prediction for each using appropriate random # effects and appropriate fixed effect covariates # for l in leaves: log_shift_l = pl.zeros(len_trace) U_l.ix[0, :] = 0. root_to_leaf = nx.shortest_path(area_hierarchy, root_area, l) for node in root_to_leaf[1:]: if node not in U_l.columns: ## Add a columns U_l[node] = rnormal(0, appropriate_tau) level = len(nx.shortest_path(area_hierarchy, 'all', node)) - 1 if 'sigma_alpha' in vars: tau_l = vars['sigma_alpha'][level].trace()**-2 U_l[node] = 0. # if this node was not already included in the alpha_trace array, add it # there are several cases for adding: # if the random effect has a distribution of Constant # add it, using a sigma as well # otherwise, sample from a normal with mean zero and standard deviation tau_l if parameters.get('random_effects', {}).get(node, {}).get('dist') == 'Constant': mu = parameters['random_effects'][node]['mu'] sigma = parameters['random_effects'][node]['sigma'] sigma = max(sigma, 1.e-9) # make sure sigma is non-zero alpha_node = mc.rnormal(mu, sigma**-2, size=len_trace) else: if 'sigma_alpha' in vars: alpha_node = mc.rnormal(0., tau_l) else: alpha_node = pl.zeros(len_trace) if len(alpha_trace) > 0: alpha_trace = pl.vstack((alpha_trace.T, alpha_node)).T else: alpha_trace = pl.atleast_2d(alpha_node).T # TODO: implement a more robust way to align alpha_trace and U_l U_l.ix[0, node] = 1. # 'shift' the random effects matrix to have the intended # level of the hierarchy as the reference value if 'U_shift' in vars: for node in vars['U_shift']: U_l -= vars['U_shift'][node] # add the random effect intercept shift (len_trace draws) log_shift_l += pl.dot(alpha_trace, U_l.T).flatten() # make X_l if len(beta_trace) > 0: X_l = covs.ix[l, sex, year] log_shift_l += pl.dot(beta_trace, X_l.T).flatten() if population_weighted: # combine in linear-space with population weights shift_l = pl.exp(log_shift_l) covariate_shift += shift_l * output_template['pop'][l, sex, year] total_population += output_template['pop'][l, sex, year] else: # combine in log-space without weights covariate_shift += log_shift_l total_population += 1. if population_weighted: covariate_shift /= total_population else: covariate_shift = pl.exp(covariate_shift / total_population) parameter_prediction = (vars['mu_age'].trace().T * covariate_shift).T # clip predictions to bounds from expert priors parameter_prediction = parameter_prediction.clip(lower, upper) return parameter_prediction
def add_data_vector_segment(self, data_vector_segment, last_segment=False): self.message = None if self.__prefixed_movie_name__ == None: return if last_segment: self.__save_frames__() return frame_name = '%s_%06d' % (self.__prefixed_movie_name__, self.idx) frame_file = as_path(self.__prefixed_movie_dir__, frame_name + '.png') #skip_frame = True if self.idx < self.p.movie_skip_to_frame or \ # (self.p.movie_skip_frames and os.path.exists(frame_file)) \ # else False skip_frame = False mean_plus = pl.mean(data_vector_segment.signal_plus) mean_minus = pl.mean(data_vector_segment.signal_minus) s_plus = len(data_vector_segment.signal_plus) _pp_spec = MiniPoincarePlotSpec() _pp_spec.idx = self.idx _pp_spec.s_plus = s_plus _pp_spec.mean_plus = mean_plus _pp_spec.mean_minus = mean_minus _pp_spec.range = self.range _pp_spec.frame_file = frame_file if self.idx == 0: self.s_size = s_plus self.x_data.put(pl.arange(s_plus), data_vector_segment.signal_plus) self.y_data.put(pl.arange(s_plus), data_vector_segment.signal_minus) ok = True old_s_plus = 0 _pp_spec.level = 0 _pp_spec.active_start = 0 _pp_spec.active_stop = s_plus else: old_s_plus = len(self.old_signal_plus) ok = False if s_plus >= old_s_plus: if pl.all(self.old_signal_plus \ == data_vector_segment.signal_plus[:old_s_plus]): old_size = self.s_size new_size = old_size + s_plus - old_s_plus if new_size > old_size: _pp_spec.active_start = old_size _pp_spec.active_stop = new_size if new_size > len(self.x_data): raise Exception( 'New size is greater then the signal size !') self.x_data.put(pl.arange(old_size, new_size), data_vector_segment.signal_plus[old_s_plus - s_plus:]) self.y_data.put(pl.arange(old_size, new_size), data_vector_segment.signal_minus[old_s_plus - s_plus:]) _pp_spec.inactive_stop = \ self._pp_spec_old.inactive_stop self.s_size = new_size _pp_spec.level = 1 ok = True else: for idx in xrange(1, old_s_plus): if pl.all(self.old_signal_plus[idx:] \ == data_vector_segment.signal_plus[idx - 1: old_s_plus - idx]): old_size = self.s_size new_size = old_size + s_plus - (old_s_plus - idx) if new_size > len(self.x_data): raise Exception( 'New size is greater then the signal size !') if new_size > old_size: _pp_spec.active_start = old_size _pp_spec.active_stop = new_size self.x_data.put(pl.arange(old_size, new_size), data_vector_segment.signal_plus[ old_s_plus - idx:]) self.y_data.put(pl.arange(old_size, new_size), data_vector_segment.signal_minus[ old_s_plus - idx:]) self.s_size = new_size _d = self.s_size - s_plus _pp_spec.inactive_start = _d - idx _pp_spec.inactive_stop = _d _pp_spec.level = 3 ok = True break else: for idx in xrange(1, old_s_plus): if idx + s_plus <= old_s_plus \ and pl.all( self.old_signal_plus[idx:idx + s_plus] \ == data_vector_segment.signal_plus): _d = self.s_size - old_s_plus _pp_spec.inactive_start = _d _pp_spec.inactive_stop = _d + idx if _pp_spec.inactive_stop + s_plus < self.s_size: _pp_spec.inactive_start_2 = \ _pp_spec.inactive_stop + s_plus _pp_spec.inactive_stop_2 = self.s_size _pp_spec.level = 2 ok = True break if ok == True and skip_frame == False: _pp_spec.x_data = self.x_data _pp_spec.y_data = self.y_data _pp_spec.cum_inactive = self.cum_inactive _pp_spec.s_size = self.s_size #print('PP_SPEC: ' + str(_p)) self.pp_spec_manager.addMiniPoincarePlotSpec(_pp_spec) if self.idx > 0 and \ (self.p.movie_bin_size > 0 and ((self.idx % self.p.movie_bin_size) == 0)): if len(self.pp_specs_managers) >= self.core_nums: if self.p.movie_calculate_all_frames == False: self.__save_frames__() self.pp_specs_managers = [] old_pp_spec_manager = self.pp_spec_manager self.pp_spec_manager = MiniPoincarePlotSpecManager() self.pp_spec_manager.movie_dir = self.__prefixed_movie_dir__ self.pp_spec_manager.movie_name = self.__prefixed_movie_name__ self.pp_spec_manager.movie_dpi = self.p.movie_dpi self.pp_spec_manager.movie_fps = self.p.movie_fps self.pp_spec_manager.movie_height = self.p.movie_height self.pp_spec_manager.movie_width = self.p.movie_width self.pp_spec_manager.active_color = self.active_color self.pp_spec_manager.inactive_color = self.inactive_color self.pp_spec_manager.centroid_color = self.centroid_color self.pp_spec_manager.active_point_size = \ self.p.movie_active_size self.pp_spec_manager.inactive_point_size = \ self.p.movie_inactive_size self.pp_spec_manager.centroid_point_size = \ self.p.movie_centroid_size self.pp_spec_manager.show_plot_legends = \ self.p.movie_show_plot_legends self.pp_spec_manager.x_label = self.p.x_label self.pp_spec_manager.y_label = self.p.y_label self.pp_spec_manager.clean_frames = self.p.movie_clean_frames self.pp_spec_manager.movie_title = self.p.movie_title self.pp_spec_manager.movie_frame_step = self.p.movie_frame_step self.pp_spec_manager.movie_identity_line = self.p.movie_identity_line self.pp_spec_manager.movie_hour_label = self.p.movie_hour_label self.pp_spec_manager.movie_minute_label = self.p.movie_minute_label self.pp_spec_manager.movie_second_label = self.p.movie_second_label self.pp_spec_manager.movie_time_label_in_line = self.p.movie_time_label_in_line self.pp_spec_manager.movie_time_label_font_size = self.p.movie_time_label_font_size self.pp_spec_manager.movie_time_label_prefix = self.p.movie_time_label_prefix self.pp_spec_manager.movie_title_font_size = self.p.movie_title_font_size self.pp_spec_manager.movie_axis_font_size = self.p.movie_axis_font_size self.pp_spec_manager.movie_axis_font = self.p.movie_axis_font self.pp_spec_manager.movie_title_font = self.p.movie_title_font self.pp_spec_manager.movie_tick_font = self.p.movie_tick_font self.pp_spec_manager.movie_frame_pad = self.p.movie_frame_pad self.pp_spec_manager.movie_create_time_label = self.p.movie_create_time_label self.pp_spec_manager.movie_frame_filename_with_time = self.p.movie_frame_filename_with_time #add all previous pp specs for pp_spec in old_pp_spec_manager.getMiniPoincarePlotSpecs(): self.pp_spec_manager.addPreviousPoincarePlotSpecMinimum( pp_spec) old_pp_spec_manager = None self.pp_specs_managers.append(self.pp_spec_manager) self.message = 'Prepare frame: %s' % (frame_name) elif ok == True and skip_frame == True: self.message = 'Skip frame %s' % (frame_name) elif ok == False: print('s_plus: ' + str(s_plus) + ' old_s_plus: ' + str(old_s_plus)) print('old_signal_plus: ' + str(self.old_signal_plus)) print('signal_plus: ' + str(data_vector_segment.signal_plus)) raise Exception('Error for idx ' + str(self.idx)) if _pp_spec.inactive_start >= 0 and _pp_spec.inactive_stop >= 0: #if time array is not None use it as array for cumulative time if not self.time == None: self.cum_inactive += pl.sum( self.time[ _pp_spec.inactive_start:_pp_spec.inactive_stop]) else: self.cum_inactive += pl.sum( self.x_data[ _pp_spec.inactive_start:_pp_spec.inactive_stop]) self.old_signal_plus = data_vector_segment.signal_plus self.idx = self.idx + 1 self._pp_spec_old = _pp_spec
def fit_posterior(dm, region, sex, year, fast_fit=False, inconsistent_fit=False, params_to_fit=['p', 'r', 'i'], zero_re=True, posteriors_only=False): """ Fit posterior of specified region/sex/year for specified model Parameters ---------- dm : DiseaseJson region : str From dismod3.settings.gbd_regions, but clean()-ed sex : str, from dismod3.settings.gbd_sexes year : str, from dismod3.settings.gbd_years fast_fit : sample 101 draws from posterior, don't try for convergence (fast for testing) inconsistent_fit : fit parameters separately params_to_fit : list of params to fit, if not fitting all consistently zero_re : bool, if true, enforce constraint that sibling area REs sum to zero posteriors_only : bool, if tru use data from 1997-2007 for 2005 and from 2007 on for 2010 Example ------- >>> import fit_posterior >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005') """ dir = dismod3.settings.JOB_WORKING_DIR % dm.id ## load the model from disk or from web import simplejson as json import data reload(data) try: model = data.ModelData.load(dir) print 'loaded data from new format from %s' % dir except (IOError, AssertionError): model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json())) #model.save(dir) print 'loaded data from json, saved in new format for next time in %s' % dir # TODO: check for missing covariates, and have them fixed, instead of filling them with zeros ## next block fills in missing covariates with zero for col in model.input_data.columns: if col.startswith('x_'): model.input_data[col] = model.input_data[col].fillna(0.) # also fill all covariates missing in output template with zeros model.output_template = model.output_template.fillna(0) predict_area = dismod3.utils.clean(region) predict_sex = dismod3.utils.clean(sex) predict_year = int(year) ## load emp_priors dict from dm.params param_type = dict(i='incidence', p='prevalence', r='remission', f='excess-mortality', rr='relative-risk', pf='prevalence_x_excess-mortality', m_with='mortality') emp_priors = {} for t in 'i r p f'.split(): # uncomment below to not use empirical prior for rate with zero data # if pl.all(model.input_data['data_type'] != t): # continue #key = dismod3.utils.gbd_key_for(param_type[t], model.hierarchy.predecessors(predict_area)[0], year, sex) key = dismod3.utils.gbd_key_for(param_type[t], predict_area, year, sex) mu = dm.get_mcmc('emp_prior_mean', key) #mu = dm.get_mcmc('emp_prior_median', key) sigma = dm.get_mcmc('emp_prior_std', key) if len(mu) == 101 and len(sigma) == 101: emp_priors[t, 'mu'] = mu # TODO: determine best way to propagate prior on function emp_priors[t, 'sigma'] = sigma # ALT 1: scale so that the joint probability is not a # function of the length of the age function # emp_priors[t, 'sigma'] = sigma * pl.sqrt(len(sigma)) ## update model.parameters['random_effects'] if there is information in the disease model expert_priors = model.parameters[t].get('random_effects', {}) model.parameters[t]['random_effects'] = dm.get_empirical_prior(param_type[t]).get('new_alpha', {}) model.parameters[t]['random_effects'].update(expert_priors) # shift random effects to make REs for observed children of predict area have mean zero re_mean = pl.mean([model.parameters[t]['random_effects'][area]['mu'] \ for area in model.hierarchy.neighbors(predict_area) \ if area in model.parameters[t]['random_effects']]) for area in model.hierarchy.neighbors(predict_area): if area in model.parameters[t]['random_effects']: model.parameters[t]['random_effects'][area]['mu'] -= re_mean ## update model.parameters['fixed_effects'] if there is information in the disease model expert_fe_priors = model.parameters[t].get('fixed_effects', {}) model.parameters[t]['fixed_effects'].update(dm.get_empirical_prior(param_type[t]).get('new_beta', {})) ## create model and priors for region/sex/year # select data that is about areas in this region, recent years, and sex of male or total only assert predict_area in model.hierarchy, 'region %s not found in area hierarchy' % predict_area subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area) def is_relevant(r): if (r['area'] not in subtree) and r['area'] != 'all': return False if predict_year == 1990: if r['year_start'] > 1997: return False elif predict_year == 2005: if posteriors_only: if r['year_end'] < 1997 or r['year_start'] > 2007: return False else: if r['year_end'] < 1997: return False elif predict_year == 2010: if posteriors_only: if r['data_type'] == 'm_all': # include m_all data from 2005, since 2010 is not loaded if r['year_end'] < 1997: return False else: if r['year_end'] < 2007: return False else: if r['year_end'] < 1997: return False else: assert 0, 'Predictions for year %d not yet implemented' % predict_year if r['sex'] not in [predict_sex, 'total']: return False return True old_relevant_rows = [i for i, r in model.input_data.T.iteritems() \ if (r['area'] in subtree or r['area'] == 'all')\ and ((predict_year >= 1997 and r['year_end'] >= 1997) or (predict_year <= 1997 and r['year_start'] <= 1997)) \ and r['sex'] in [predict_sex, 'total']] relevant_rows = model.input_data.index[model.input_data.apply(is_relevant, axis=1)] if predict_year == 1990: assert pl.all(relevant_rows == old_relevant_rows), "relevant rows should be the same in new and old implementation for 1990" if not posteriors_only: assert pl.all(relevant_rows == old_relevant_rows), "relevant rows should be the same in new and old implementation when posteriors_only is False" model.input_data = model.input_data.ix[relevant_rows] # replace area 'all' with predict_area model.input_data['area'][model.input_data['area'] == 'all'] = predict_area if inconsistent_fit: # generate fits for requested parameters inconsistently for t in params_to_fit: model.vars += ism.age_specific_rate(model, t, reference_area=predict_area, reference_sex=predict_sex, reference_year=predict_year, mu_age=None, mu_age_parent=emp_priors.get((t, 'mu')), sigma_age_parent=emp_priors.get((t, 'sigma')), rate_type=(t == 'rr') and 'log_normal' or 'neg_binom', zero_re=zero_re) if fast_fit: dismod3.fit.fit_asr(model, t, iter=101, burn=0, thin=1, tune_interval=100) else: dismod3.fit.fit_asr(model, t, iter=iter, burn=burn, thin=thin, tune_interval=100) else: model.vars += ism.consistent(model, reference_area=predict_area, reference_sex=predict_sex, reference_year=predict_year, priors=emp_priors, zero_re=zero_re) ## fit model to data if fast_fit: dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, 105, 0, 1, 100) else: dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, iter=iter, burn=burn, thin=thin, tune_interval=100, verbose=True) # generate estimates posteriors = {} for t in 'i r f p rr pf m_with X'.split(): if t in model.vars: if t in model.parameters and 'level_bounds' in model.parameters[t]: lower=model.parameters[t]['level_bounds']['lower'] upper=model.parameters[t]['level_bounds']['upper'] else: lower=0 upper=pl.inf posteriors[t] = covariate_model.predict_for(model, model.parameters.get(t, {}), predict_area, predict_sex, predict_year, predict_area, predict_sex, predict_year, True, # population weighted averages model.vars[t], lower, upper) try: graphics.plot_fit(model, vars, emp_priors, {}) pl.savefig(dir + '/image/posterior-%s+%s+%s.png'%(predict_area, predict_sex, predict_year)) except Exception, e: print 'Error generating output graphics' print e
def assert_almost_equal(x, y): log_offset_diff = pl.log(x + 1.e-4) - pl.log(y + 1.e-4) assert pl.all(log_offset_diff**2 <= 1.e-4), 'expected approximate equality, found means of:\n %s\n %s' % (x.mean(1), y.mean(1))
def draw(board): return all(ravel(board != '.'))