Beispiel #1
0
def log_normal(name, pi, sigma, p, s):
    """ Generate PyMC objects for a lognormal model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `sigma` : pymc.Node, dispersion parameters of rates
      - `p` : array, observed values of rates
      - `s` : array, standard error sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p > 0), 'observed values must be positive'
    assert pl.all(s >= 0), 'standard error must be non-negative'

    i_inf = pl.isinf(s)

    @mc.observed(name='p_obs_%s' % name)
    def p_obs(value=p, pi=pi, sigma=sigma, s=s):
        return mc.normal_like(pl.log(value), pl.log(pi + 1.e-9),
                              1. / (sigma**2. + (s / value)**2.))

    s_noninf = s.copy()
    s_noninf[i_inf] = 0.

    @mc.deterministic(name='p_pred_%s' % name)
    def p_pred(pi=pi, sigma=sigma, s=s_noninf):
        return pl.exp(
            mc.rnormal(pl.log(pi + 1.e-9),
                       1. / (sigma**2. + (s / (pi + 1.e-9))**2)))

    return dict(p_obs=p_obs, p_pred=p_pred)
Beispiel #2
0
def log_normal(name, pi, sigma, p, s):
    """ Generate PyMC objects for a lognormal model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `sigma` : pymc.Node, dispersion parameters of rates
      - `p` : array, observed values of rates
      - `s` : array, standard error sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p > 0), "observed values must be positive"
    assert pl.all(s >= 0), "standard error must be non-negative"

    i_inf = pl.isinf(s)

    @mc.observed(name="p_obs_%s" % name)
    def p_obs(value=p, pi=pi, sigma=sigma, s=s):
        return mc.normal_like(pl.log(value), pl.log(pi + 1.0e-9), 1.0 / (sigma ** 2.0 + (s / value) ** 2.0))

    s_noninf = s.copy()
    s_noninf[i_inf] = 0.0

    @mc.deterministic(name="p_pred_%s" % name)
    def p_pred(pi=pi, sigma=sigma, s=s_noninf):
        return pl.exp(mc.rnormal(pl.log(pi + 1.0e-9), 1.0 / (sigma ** 2.0 + (s / (pi + 1.0e-9)) ** 2)))

    return dict(p_obs=p_obs, p_pred=p_pred)
Beispiel #3
0
def binom(name, pi, p, n):
    """ Generate PyMC objects for a binomial model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p >= 0), 'observed values must be non-negative'
    assert pl.all(n >= 0), 'effective sample size must non-negative'

    @mc.observed(name='p_obs_%s' % name)
    def p_obs(value=p, pi=pi, n=n):
        return mc.binomial_like(value * n, n, pi + 1.e-9)

    # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity
    n_nonzero = pl.array(n, dtype=int)
    n_nonzero[n == 0] = 1.e6

    @mc.deterministic(name='p_pred_%s' % name)
    def p_pred(pi=pi, n=n_nonzero):
        return mc.rbinomial(n, pi + 1.e-9) / (1. * n)

    return dict(p_obs=p_obs, p_pred=p_pred)
Beispiel #4
0
def offset_log_normal(name, pi, sigma, p, s):
    """ Generate PyMC objects for an offset log-normal model
    
    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `sigma` : pymc.Node, dispersion parameters of rates
      - `p` : array, observed values of rates
      - `s` : array, standard error sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p >= 0), 'observed values must be non-negative'
    assert pl.all(s >= 0), 'standard error must be non-negative'

    p_zeta = mc.Uniform('p_zeta_%s'%name, 1.e-9, 10., value=1.e-6)

    i_inf = pl.isinf(s)
    @mc.observed(name='p_obs_%s'%name)
    def p_obs(value=p, pi=pi, sigma=sigma, s=s, p_zeta=p_zeta):
        return mc.normal_like(pl.log(value[~i_inf]+p_zeta), pl.log(pi[~i_inf]+p_zeta),
                              1./(sigma**2. + (s/(value+p_zeta))[~i_inf]**2.))

    s_noninf = s.copy()
    s_noninf[i_inf] = 0.
    @mc.deterministic(name='p_pred_%s'%name)
    def p_pred(pi=pi, sigma=sigma, s=s_noninf, p_zeta=p_zeta):
        return pl.exp(mc.rnormal(pl.log(pi+p_zeta), 1./(sigma**2. + (s/(pi+p_zeta))**2.))) - p_zeta

    return dict(p_zeta=p_zeta, p_obs=p_obs, p_pred=p_pred)
Beispiel #5
0
def poisson(name, pi, p, n):
    """ Generate PyMC objects for a poisson model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p >= 0), "observed values must be non-negative"
    assert pl.all(n >= 0), "effective sample size must non-negative"

    i_nonzero = n != 0.0

    @mc.observed(name="p_obs_%s" % name)
    def p_obs(value=p, pi=pi, n=n):
        return mc.poisson_like((value * n)[i_nonzero], (pi * n)[i_nonzero])

    # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity
    n_nonzero = pl.array(n.copy(), dtype=float)
    n_nonzero[n == 0.0] = 1.0e6

    @mc.deterministic(name="p_pred_%s" % name)
    def p_pred(pi=pi, n=n_nonzero):
        return mc.rpoisson((pi * n).clip(1.0e-9, pl.inf)) / (1.0 * n)

    return dict(p_obs=p_obs, p_pred=p_pred)
Beispiel #6
0
def beta_binom(name, pi, p, n):
    """ Generate PyMC objects for a beta-binomial model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p >= 0), 'observed values must be non-negative'
    assert pl.all(n >= 0), 'effective sample size must non-negative'

    p_n = mc.Uniform('p_n_%s'%name, lower=1.e4, upper=1.e9, value=1.e4)  # convergence requires getting these bounds right
    pi_latent = [mc.Beta('pi_latent_%s_%d'%(name,i), pi[i]*p_n, (1-pi[i])*p_n, value=pi_i) for i, pi_i in enumerate(pi.value)]

    i_nonzero = (n!=0.)
    @mc.observed(name='p_obs_%s'%name)
    def p_obs(value=p, pi=pi_latent, n=n):
        pi_flat = pl.array(pi)
        return mc.binomial_like((value*n)[i_nonzero], n[i_nonzero], pi_flat[i_nonzero])

    # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity
    n_nonzero = pl.array(n.copy(), dtype=int)
    n_nonzero[n==0] = 1.e6
    @mc.deterministic(name='p_pred_%s'%name)
    def p_pred(pi=pi_latent, n=n_nonzero):
        return mc.rbinomial(n, pi) / (1.*n)

    return dict(p_n=p_n, pi_latent=pi_latent, p_obs=p_obs, p_pred=p_pred)
Beispiel #7
0
def binom(name, pi, p, n):
    """ Generate PyMC objects for a binomial model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p >= 0), "observed values must be non-negative"
    assert pl.all(n >= 0), "effective sample size must non-negative"

    @mc.observed(name="p_obs_%s" % name)
    def p_obs(value=p, pi=pi, n=n):
        return mc.binomial_like(value * n, n, pi + 1.0e-9)

    # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity
    n_nonzero = pl.array(n, dtype=int)
    n_nonzero[n == 0] = 1.0e6

    @mc.deterministic(name="p_pred_%s" % name)
    def p_pred(pi=pi, n=n_nonzero):
        return mc.rbinomial(n, pi + 1.0e-9) / (1.0 * n)

    return dict(p_obs=p_obs, p_pred=p_pred)
Beispiel #8
0
def poisson(name, pi, p, n):
    """ Generate PyMC objects for a poisson model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p >= 0), 'observed values must be non-negative'
    assert pl.all(n >= 0), 'effective sample size must non-negative'

    i_nonzero = (n != 0.)

    @mc.observed(name='p_obs_%s' % name)
    def p_obs(value=p, pi=pi, n=n):
        return mc.poisson_like((value * n)[i_nonzero], (pi * n)[i_nonzero])

    # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity
    n_nonzero = pl.array(n.copy(), dtype=float)
    n_nonzero[n == 0.] = 1.e6

    @mc.deterministic(name='p_pred_%s' % name)
    def p_pred(pi=pi, n=n_nonzero):
        return mc.rpoisson((pi * n).clip(1.e-9, pl.inf)) / (1. * n)

    return dict(p_obs=p_obs, p_pred=p_pred)
Beispiel #9
0
def beta_binom(name, pi, p, n):
    """ Generate PyMC objects for a beta-binomial model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p >= 0), 'observed values must be non-negative'
    assert pl.all(n >= 0), 'effective sample size must non-negative'

    p_n = mc.Uniform('p_n_%s'%name, lower=1.e4, upper=1.e9, value=1.e4)  # convergence requires getting these bounds right
    pi_latent = [mc.Beta('pi_latent_%s_%d'%(name,i), pi[i]*p_n, (1-pi[i])*p_n, value=pi_i) for i, pi_i in enumerate(pi.value)]

    i_nonzero = (n!=0.)
    @mc.observed(name='p_obs_%s'%name)
    def p_obs(value=p, pi=pi_latent, n=n):
        pi_flat = pl.array(pi)
        return mc.binomial_like((value*n)[i_nonzero], n[i_nonzero], pi_flat[i_nonzero])

    # for any observation with n=0, make predictions for n=1.e6, to use for predictive validity
    n_nonzero = pl.array(n.copy(), dtype=int)
    n_nonzero[n==0] = 1.e6
    @mc.deterministic(name='p_pred_%s'%name)
    def p_pred(pi=pi_latent, n=n_nonzero):
        return mc.rbinomial(n, pi) / (1.*n)

    return dict(p_n=p_n, pi_latent=pi_latent, p_obs=p_obs, p_pred=p_pred)
Beispiel #10
0
def setup_asr_step_methods(m, vars, additional_stochs=[]):
    # groups RE stochastics that are suspected of being dependent
    groups = []
    fe_group = [n for n in vars.get('beta', []) if isinstance(n, mc.Stochastic)]
    ap_group = [n for n in vars.get('gamma', []) if isinstance(n, mc.Stochastic)]
    groups += [[g_i, g_j] for g_i, g_j in zip(ap_group[1:], ap_group[:-1])] + [fe_group, ap_group, fe_group+ap_group]

    for a in vars.get('hierarchy', []):
        group = []

        col_map = dict([[key, i] for i,key in enumerate(vars['U'].columns)])
        
        if a in vars['U']:
            for b in nx.shortest_path(vars['hierarchy'], 'all', a):
                if b in vars['U']:
                    n = vars['alpha'][col_map[b]]
                    if isinstance(n, mc.Stochastic):
                        group.append(n)
        groups.append(group)
        #if len(group) > 0:
            #group += ap_group
            #groups.append(group)
            #group += fe_group
            #groups.append(group)
                    
    for stoch in groups:
        if len(stoch) > 0 and pl.all([isinstance(n, mc.Stochastic) for n in stoch]):
            # only step certain stochastics, for understanding convergence
            #if 'gamma_i' not in stoch[0].__name__:
            #    print 'no stepper for', stoch
            #    m.use_step_method(mc.NoStepper, stoch)
            #    continue

            #print 'finding Normal Approx for', [n.__name__ for n in stoch]
            if additional_stochs == []:
                vars_to_fit = [vars.get('p_obs'), vars.get('pi_sim'), vars.get('smooth_gamma'), vars.get('parent_similarity'),
                               vars.get('mu_sim'), vars.get('mu_age_derivative_potential'), vars.get('covariate_constraint')]
            else:
                vars_to_fit = additional_stochs

            try:
                raise ValueError
                na = mc.NormApprox(vars_to_fit + stoch)
                na.fit(method='fmin_powell', verbose=0)
                cov = pl.array(pl.inv(-na.hess), order='F')
                #print 'opt:', pl.round_([n.value for n in stoch], 2)
                #print 'cov:\n', cov.round(4)
                if pl.all(pl.eigvals(cov) >= 0):
                    m.use_step_method(mc.AdaptiveMetropolis, stoch, cov=cov)
                else:
                    raise ValueError
            except ValueError:
                #print 'cov matrix is not positive semi-definite'
                m.use_step_method(mc.AdaptiveMetropolis, stoch)
Beispiel #11
0
    def plotD(self, t):

        Z = self.matrix[t]

        if self.axt != None:

            self.axt.cla()
            self.fig.delaxes(self.axt)
            self.axt = None

        if self.ax == None:

            self.ax = self.fig.add_subplot(111)

        ax = self.ax
        ax.cla()
        ax.set_title(u'Evolución de curvas de nivel en el dominio (Problema directo, 2D)')

        divi=np.zeros((len(self.Y),len(self.X)), float)
        divi[:,:]=Z[0,0]
        #BUG DE MATPLOTLIB: se grafica solo si la matriz no es multiplo de ones, no hay una curva de nivel

        if not p.all(np.equal(Z,divi)):

            ax.contour(self.X, self.Y, Z)
    def __on_ir_sensor_data_(self, ir_data):
        if len(ir_data) != 4:
            return

        points = []
        [points.append([io['x'], io['y']]) for io in ir_data]

        # the ir sensor likes to initialize with 1023 for all coordinates
        # so we catch that case here

        if self.is_ir_initial:
            self.is_ir_initial = pl.all(pl.array(points) == 1023)
            return

        x_values, y_values = \
            self.__get_moving_averages_points(um.sort_points(points))

        x, y = um.get_projection_transformed_point(x_values, y_values,
                                                   self.monitor_width,
                                                   self.monitor_height,
                                                   self.width / 2,
                                                   self.height / 2)

        self.pointer_location = (x, y)

        self.ir_data_updated.emit()
Beispiel #13
0
    def SVMAF(self,freq,n,l):
        #Apply the SVMAF filter to the material parameters
        runningMean=lambda x,N: py.hstack((x[:N-1],py.convolve(x,py.ones((N,))/N,mode='same')[N-1:-N+1],x[(-N+1):]))
        #calculate the moving average of 3 points
        n_smoothed=runningMean(n,3)
        #evaluate H_smoothed from n_smoothed
        H_smoothed=self.H_theory(freq,[n_smoothed.real,n_smoothed.imag],l)
        
        H_r=H_smoothed.real
        H_i=H_smoothed.imag
        f=1
        #the uncertainty margins
        lb_r=self.H.getFReal()-self.H.getFRealUnc()*f
        lb_i=self.H.getFImag()-self.H.getFImagUnc()*f
        ub_r=self.H.getFReal()+self.H.getFRealUnc()*f
        ub_i=self.H.getFImag()+self.H.getFImagUnc()*f
        
        #ix=all indices for which after smoothening n H is still inbetwen the bounds        
        ix=py.all([H_r>=lb_r,H_r<ub_r,H_i>=lb_i,H_i<ub_i],axis=0)
#        #dont have a goood idea at the moment, so manually:
        for i in range(len(n_smoothed)):
            if ix[i]==0:
                n_smoothed[i]=n[i]
        print("SVMAF changed the refractive index at " + str(sum(ix)) + " frequencies")
        return n_smoothed      
Beispiel #14
0
def normal_model(name, pi, sigma, p, s):
    """ Generate PyMC objects for a normal model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `sigma` : pymc.Node, dispersion parameters of rates
      - `p` : array, observed values of rates
      - `s` : array, standard error of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(s >= 0), 'standard error must be non-negative'

    i_inf = pl.isinf(s)
    @mc.observed(name='p_obs_%s'%name)
    def p_obs(value=p, pi=pi, sigma=sigma, s=s):
        return mc.normal_like(value[~i_inf], pi[~i_inf], 1./(sigma**2. + s[~i_inf]**2.))

    s_noninf = s.copy()
    s_noninf[i_inf] = 0.    
    @mc.deterministic(name='p_pred_%s'%name)
    def p_pred(pi=pi, sigma=sigma, s=s_noninf):
        return mc.rnormal(pi, 1./(sigma**2. + s**2.))

    return dict(p_obs=p_obs, p_pred=p_pred)
Beispiel #15
0
def get_emp(disease, country, sex, year):
    # load posterior estimates from GBD 2010 Study
    global_model = load_new_model(disease, 'all', sex, year)
    emp = pandas.read_csv('/home/j/Project/dismod/output/dm-%s/posterior/dm-%s-%s-%s-%s-%s.csv'%(disease, disease, full_name[data_type], global_model.hierarchy.in_edges(country)[0][0], sex, year), index_col=None)
 
    # remove population numbers
    del emp['Population']
 
    # keep only estimates from country
    cty_ix = (emp['Iso3'] == country)
    emp = emp[cty_ix]
    del emp['Iso3']
 
    # keep only estimates for data type
    try:
        assert pl.all(emp['Rate type'] == full_name[data_type])
    except:
        dt_ix = (emp['Rate type']) == full_name[data_type]
        emp = emp[dt_ix]
    del emp['Rate type'] 
    
    # return GBD 2010 Study posterior 
    emp.index = emp['Age']
    del emp['Age']
    return emp.mean(1), emp.std(1)*10
Beispiel #16
0
def visualize_steps(mod, fname='mod.avi', description_str=''):
    times = list(pl.arange(0, 30, .2)) + range(30, 200) + range(200, 1500, 10)
    times += range(1500, 1700) + range(1700, 3000, 10)
    times += range(3000, 3200) + range(3200, len(mod.X.trace()), 10)
    assert pl.all(
        pl.diff(times) >= 0.
    ), 'movies where time is not increasing are confusing and probably unintentional'
    try:
        print 'generating %d images' % len(times)
        for i, t in enumerate(times):
            if i % 100 == 99:
                print '%d of %d (t=%.2f)' % (i, len(times), t)
            sys.stdout.flush()
            visualize_single_step(mod, int(t), t - int(t), description_str)
            pl.savefig('mod%06d.png' % i)
    except KeyboardInterrupt:
        pass

    import subprocess
    subprocess.call(
        'mencoder mf://mod*.png -mf w=800:h=600 -ovc x264 -of avi -o %s' %
        fname,
        shell=True)
    subprocess.call('mplayer -loop 1 %s' % fname, shell=True)
    subprocess.call('rm mod*.png', shell=True)
Beispiel #17
0
    def plotD(self, t):

        Z = self.matrix[t]

        if self.axt != None:

            self.axt.cla()
            self.fig.delaxes(self.axt)
            self.axt = None

        if self.ax == None:

            self.ax = self.fig.add_subplot(111)

        ax = self.ax
        ax.cla()
        ax.set_title(
            u'Evolución de curvas de nivel en el dominio (Problema directo, 2D)'
        )

        divi = np.zeros((len(self.Y), len(self.X)), float)
        divi[:, :] = Z[0, 0]
        #BUG DE MATPLOTLIB: se grafica solo si la matriz no es multiplo de ones, no hay una curva de nivel

        if not p.all(np.equal(Z, divi)):

            ax.contour(self.X, self.Y, Z)
Beispiel #18
0
  def integrate_field(self, fn_spec, specific, fn_main, r=20, val=0.0):
    """
    Assimilate a field with filename <fn_spec>  from DataInput object 
    <specific> into this DataInput's field with filename <fn_main>.  The
    parameter <val> should be set to the specific dataset's value for 
    undefined regions, default is 0.0.  <r> is a parameter used to eliminate
    border artifacts from interpolation; increase this value to eliminate edge
    noise.
    """
    print "::: integrating %s field from %s :::" % (fn_spec, specific.name)
    # get the dofmap to map from mesh vertex indices to function indicies :
    df    = self.func_space.dofmap()
    dfmap = df.vertex_to_dof_map(self.mesh)
    
    unew  = self.get_projection(fn_main)      # existing dataset projection
    uocom = unew.compute_vertex_values()      # mesh indexed main vertex values
    
    uspec = specific.get_projection(fn_spec)  # specific dataset projection
    uscom = uspec.compute_vertex_values()     # mesh indexed spec vertex values

    d     = float64(specific.data[fn_spec])   # original matlab spec dataset

    # get arrays of x-values for specific domain
    xs    = specific.x
    ys    = specific.y
    nx    = specific.nx
    ny    = specific.ny
    
    for v in vertices(self.mesh):
      # mesh vertex x,y coordinate :
      i   = v.index()
      p   = v.point()
      x   = p.x()
      y   = p.y()
      
      # indexes of closest datapoint to specific dataset's x and y domains :
      idx = abs(xs - x).argmin()
      idy = abs(ys - y).argmin()
      
      # data value for closest value and square around the value in question :
      dv  = d[idy, idx] 
      db  = d[max(0,idy-r) : min(ny, idy+r),  max(0, idx-r) : min(nx, idx+r)]
      
      # if the vertex is in the domain of the specific dataset, and the value 
      # of the dataset at this point is not abov <val>, set the array value 
      # of the main file to this new specific region's value.
      if dv > val:
        #print "found:", x, y, idx, idy, v.index()
        # if the values is not near an edge, make the value equal to the 
        # nearest specific region's dataset value, otherwise, use the 
        # specific region's projected value :
        if all(db > val):
          uocom[i] = uscom[i]
        else :
          uocom[i] = dv
    
    # set the values of the projected original dataset equal to the assimilated
    # dataset :
    unew.vector().set_local(uocom[dfmap])
    return unew
Beispiel #19
0
def test_save_and_load():
    d = data.ModelData.from_gbd_json('tests/dismoditis.json')

    # TODO: delete this dir if it exists
    d.save('tests/tmp')

    # TODO: test that files really were created
    d2 = data.ModelData.load('tests/tmp')
    assert d.input_data.shape == d2.input_data.shape, 'input data should be equal before and after save'
    assert pl.all(d.input_data['value'] == d2.input_data['value']), 'input data should be equal before and after save'

    assert d.output_template.shape == d2.output_template.shape, 'output template should be equal before and after save'
    assert pl.all(d.output_template['area'] == d2.output_template['area']), 'output template should be equal before and after save'

    assert d.parameters == d2.parameters, 'parameters should be equal before and after save'
    assert sorted(d.hierarchy.edges()) == sorted(d2.hierarchy.edges()), 'hierarchy should be equal before and after save'
    assert d.nodes_to_fit == d2.nodes_to_fit, 'nodess_to_fit should be equal before and after save'
Beispiel #20
0
 def test_good_model(self):
     vars = models.latent_simplex(self.X)
     assert pl.all(
         pl.sum(vars['pi'].value, 1) <= 1.0
     ), 'pi values should sum to at most 1, (%s found)' % pl.sum(
         vars['pi'].value, 1)
     m = mc.MCMC(vars)
     m.sample(10)
Beispiel #21
0
def neg_binom(name, pi, delta, p, n):
    """ Generate PyMC objects for a negative binomial model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `delta` : pymc.Node, dispersion parameters of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p >= 0), 'observed values must be non-negative'
    assert pl.all(n >= 0), 'effective sample size must non-negative'

    i_zero = pl.array(n == 0.)

    if (isinstance(delta, mc.Node) and pl.shape(delta.value) == ()) \
            or (not isinstance(delta, mc.Node) and pl.shape(delta) == ()): # delta is a scalar

        @mc.observed(name='p_obs_%s' % name)
        def p_obs(value=p, pi=pi, delta=delta, n=n):
            return mc.negative_binomial_like(value[~i_zero] * n[~i_zero],
                                             pi[~i_zero] * n[~i_zero] + 1.e-9,
                                             delta)
    else:

        @mc.observed(name='p_obs_%s' % name)
        def p_obs(value=p, pi=pi, delta=delta, n=n):
            return mc.negative_binomial_like(value[~i_zero] * n[~i_zero],
                                             pi[~i_zero] * n[~i_zero] + 1.e-9,
                                             delta[~i_zero])

    # for any observation with n=0, make predictions for n=1.e9, to use for predictive validity
    n_nonzero = n.copy()
    n_nonzero[i_zero] = 1.e9

    @mc.deterministic(name='p_pred_%s' % name)
    def p_pred(pi=pi, delta=delta, n=n_nonzero):
        return mc.rnegative_binomial(pi * n + 1.e-9, delta) / pl.array(
            n + 1.e-9, dtype=float)

    return dict(p_obs=p_obs, p_pred=p_pred)
Beispiel #22
0
 def __iadd__(self,other):
     if(self.id==None):
         self.id = other.id;
     if(pl.all(self.id==other.id)):
         self.trials+=other.trials;
         self.nr_trials = len(self.trials);
     else:
        print "\nERROR: cannot concatenate blocks with differing parameters!\n" 
     return self;
Beispiel #23
0
def neg_binom(name, pi, delta, p, n):
    """ Generate PyMC objects for a negative binomial model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `delta` : pymc.Node, dispersion parameters of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' and 'p_pred' the observed stochastic likelihood and data predicted stochastic

    """
    assert pl.all(p >= 0), "observed values must be non-negative"
    assert pl.all(n >= 0), "effective sample size must non-negative"

    i_zero = pl.array(n == 0.0)

    if (isinstance(delta, mc.Node) and pl.shape(delta.value) == ()) or (
        not isinstance(delta, mc.Node) and pl.shape(delta) == ()
    ):  # delta is a scalar

        @mc.observed(name="p_obs_%s" % name)
        def p_obs(value=p, pi=pi, delta=delta, n=n):
            return mc.negative_binomial_like(value[~i_zero] * n[~i_zero], pi[~i_zero] * n[~i_zero] + 1.0e-9, delta)

    else:

        @mc.observed(name="p_obs_%s" % name)
        def p_obs(value=p, pi=pi, delta=delta, n=n):
            return mc.negative_binomial_like(
                value[~i_zero] * n[~i_zero], pi[~i_zero] * n[~i_zero] + 1.0e-9, delta[~i_zero]
            )

    # for any observation with n=0, make predictions for n=1.e9, to use for predictive validity
    n_nonzero = n.copy()
    n_nonzero[i_zero] = 1.0e9

    @mc.deterministic(name="p_pred_%s" % name)
    def p_pred(pi=pi, delta=delta, n=n_nonzero):
        return mc.rnegative_binomial(pi * n + 1.0e-9, delta) / pl.array(n + 1.0e-9, dtype=float)

    return dict(p_obs=p_obs, p_pred=p_pred)
Beispiel #24
0
    def plotD2(self, t, ax):

        Z = self.matrix[t]
        ax.cla()

        divi = np.zeros((len(self.Y), len(self.X)), float)
        divi[:, :] = Z[0, 0]
        # BUG DE MATPLOTLIB: se grafica solo si la matriz no es multiplo de ones, no hay una curva de nivel

        if not p.all(np.equal(Z, divi)):
            ax.contour(self.X, self.Y, Z)
Beispiel #25
0
def test_save_and_load():
    d = data.ModelData.from_gbd_json('tests/dismoditis.json')

    # TODO: delete this dir if it exists
    d.save('tests/tmp')

    # TODO: test that files really were created
    d2 = data.ModelData.load('tests/tmp')
    assert d.input_data.shape == d2.input_data.shape, 'input data should be equal before and after save'
    assert pl.all(d.input_data['value'] == d2.input_data['value']
                  ), 'input data should be equal before and after save'

    assert d.output_template.shape == d2.output_template.shape, 'output template should be equal before and after save'
    assert pl.all(d.output_template['area'] == d2.output_template['area']
                  ), 'output template should be equal before and after save'

    assert d.parameters == d2.parameters, 'parameters should be equal before and after save'
    assert sorted(d.hierarchy.edges()) == sorted(d2.hierarchy.edges(
    )), 'hierarchy should be equal before and after save'
    assert d.nodes_to_fit == d2.nodes_to_fit, 'nodess_to_fit should be equal before and after save'
Beispiel #26
0
    def plotD2(self, t, ax):

        Z = self.matrix[t]
        ax.cla()

        divi = np.zeros((len(self.Y), len(self.X)), float)
        divi[:, :] = Z[0, 0]
        #BUG DE MATPLOTLIB: se grafica solo si la matriz no es multiplo de ones, no hay una curva de nivel

        if not p.all(np.equal(Z, divi)):
            ax.contour(self.X, self.Y, Z)
Beispiel #27
0
def load_block_data(pathname):
    valid_filenames = [
        fname for fname in glob(pathname + '/*.yml')
        if filename_pattern.match(fname)
    ]
    blocks = sorted([SearchBlock(fname) for fname in valid_filenames])
    ids = unique([block.id for block in blocks])
    combined_blocks = []
    for id in ids:
        blk = pl.sum([block for block in blocks if pl.all(block.id == id)])
        combined_blocks.append(blk)
    return combined_blocks
def make_exact(time):
    dens1 = 1.0
    Gamma = 1.4
    Mach = 2.0
    dens2 = dens1 * (Gamma + 1.0) * Mach**2 / ((Gamma - 1.0) * Mach**2 + 2.0)
    speed = Mach * pylab.sqrt(Gamma * 1.0 / dens1)
    xexact = pylab.arange(0.0, 1.0, 0.001)
    deltime = time - xexact / speed
    exact = pylab.ones(shape=deltime.shape) * dens1
    index = pylab.all([deltime > 0], axis=0)
    exact[index] = dens2
    return (xexact, exact)
Beispiel #29
0
def neg_binom_lower_bound(name, pi, delta, p, n):
    """ Generate PyMC objects for a negative binomial lower bound model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `delta` : pymc.Node, dispersion parameters of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' the observed stochastic 

    """
    assert pl.all(p >= 0), 'observed values must be non-negative'
    assert pl.all(n > 0), 'effective sample size must be positive'

    @mc.observed(name='p_obs_%s'%name)
    def p_obs(value=p, pi=pi, delta=delta, n=n):
        return mc.negative_binomial_like(pl.maximum(value*n, pi*n), pi*n+1.e-9, delta)

    return dict(p_obs=p_obs)
Beispiel #30
0
def neg_binom_lower_bound(name, pi, delta, p, n):
    """ Generate PyMC objects for a negative binomial lower bound model

    :Parameters:
      - `name` : str
      - `pi` : pymc.Node, expected values of rates
      - `delta` : pymc.Node, dispersion parameters of rates
      - `p` : array, observed values of rates
      - `n` : array, effective sample sizes of rates

    :Results:
      - Returns dict of PyMC objects, including 'p_obs' the observed stochastic 

    """
    assert pl.all(p >= 0), "observed values must be non-negative"
    assert pl.all(n > 0), "effective sample size must be positive"

    @mc.observed(name="p_obs_%s" % name)
    def p_obs(value=p, pi=pi, delta=delta, n=n):
        return mc.negative_binomial_like(pl.maximum(value * n, pi * n), pi * n + 1.0e-9, delta)

    return dict(p_obs=p_obs)
  def identify_nans(self, data, fn):
    """ 
    private method to identify rows and columns of all nans from grids. This 
    happens when the data from multiple GIS databases don't quite align on 
    whatever the desired grid is.
    """
    #print "::: DataInput identifying NaNs for %s :::" % fn

    good_x = ~all(isnan(data), axis=0) & self.good_x  # good cols
    good_y = ~all(isnan(data), axis=1) & self.good_y  # good rows
    
    if any(good_x != self.good_x):
      total_nan_x = sum(good_x == False)
      self.rem_nans = True
      print "Warning: %d row(s) of \"%s\" are entirely NaN." % (total_nan_x, fn)

    if any(good_y != self.good_y):
      total_nan_y = sum(good_y == False)
      self.rem_nans = True
      print "Warning: %d col(s) of \"%s\" are entirely NaN." % (total_nan_y, fn)
    
    self.good_x = good_x
    self.good_y = good_y
Beispiel #32
0
  def identify_nans(self, data, fn):
    """
    private method to identify rows and columns of all nans from grids. This
    happens when the data from multiple GIS databases don't quite align on
    whatever the desired grid is.
    """
    good_x = ~all(isnan(data), axis=0) & self.good_x  # good cols
    good_y = ~all(isnan(data), axis=1) & self.good_y  # good rows

    if any(good_x != self.good_x):
      total_nan_x = sum(good_x == False)
      self.rem_nans = True
      s =  "Warning: %d row(s) of \"%s\" are entirely NaN." % (total_nan_x, fn)
      print_text(s, self.color)

    if any(good_y != self.good_y):
      total_nan_y = sum(good_y == False)
      self.rem_nans = True
      s = "Warning: %d col(s) of \"%s\" are entirely NaN." % (total_nan_y, fn)
      print_text(s, self.color)

    self.good_x = good_x
    self.good_y = good_y
    def test_process_fit_results(self):
        r = p.arange(5)
        e = p.outer(p.arange(5), p.arange(5))

        alpha_psp = AlphaPSP()
        pr, pe = alpha_psp.process_fit_results(r, e)

        self.assertTrue(p.all(pr == p.array([0, 2, 1, 3, 4])))
        self.assertEqual(pe[1, 1], 4)
        self.assertEqual(pe[2, 2], 1)

        self.assertLess(pr[2], pr[1])
        self.assertLess(pe[2, 2], pe[1, 1])

        # test again with permuted values
        pr, pe = alpha_psp.process_fit_results(pr, pe)

        self.assertTrue(p.all(pr == p.array([0, 2, 1, 3, 4])))
        self.assertEqual(pe[1, 1], 4)
        self.assertEqual(pe[2, 2], 1)

        self.assertLess(pr[2], pr[1])
        self.assertLess(pe[2, 2], pe[1, 1])
Beispiel #34
0
def spline(name, ages, knots, smoothing, interpolation_method='linear'):
    """ Generate PyMC objects for a spline model of age-specific rate

    Parameters
    ----------
    name : str
    knots : array
    ages : array, points to interpolate to
    smoothing : pymc.Node, smoothness parameter for smoothing spline
    interpolation_method : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, 'cubic'

    Results
    -------
    Returns dict of PyMC objects, including 'gamma' (log of rate at
    knots) and 'mu_age' (age-specific rate interpolated at all age
    points)
    """
    assert pl.all(pl.diff(knots) > 0), 'Spline knots must be strictly increasing'

    # TODO: consider changing this prior distribution to be something more familiar in linear space
    gamma = [mc.Normal('gamma_%s_%d'%(name,k), 0., 10.**-2, value=-10.) for k in knots]
    #gamma = [mc.Uniform('gamma_%s_%d'%(name,k), -20., 20., value=-10.) for k in knots]

    # TODO: fix AdaptiveMetropolis so that this is not necessary
    flat_gamma = mc.Lambda('flat_gamma_%s'%name, lambda gamma=gamma: pl.array([x for x in pl.flatten(gamma)]))


    import scipy.interpolate
    @mc.deterministic(name='mu_age_%s'%name)
    def mu_age(gamma=flat_gamma, knots=knots, ages=ages):
        mu = scipy.interpolate.interp1d(knots, pl.exp(gamma), kind=interpolation_method, bounds_error=False, fill_value=0.)
        return mu(ages)

    vars = dict(gamma=gamma, mu_age=mu_age, ages=ages, knots=knots)

    if (smoothing > 0) and (not pl.isinf(smoothing)):
        #print 'adding smoothing of', smoothing
        @mc.potential(name='smooth_mu_%s'%name)
        def smooth_gamma(gamma=flat_gamma, knots=knots, tau=smoothing**-2):
            # the following is to include a "noise floor" so that level value
            # zero prior does not exert undue influence on age pattern
            # smoothing
            # TODO: consider changing this to an offset log normal
            gamma = gamma.clip(pl.log(pl.exp(gamma).mean()/10.), pl.inf)  # only include smoothing on values within 10x of mean

            return mc.normal_like(pl.sqrt(pl.sum(pl.diff(gamma)**2 / pl.diff(knots))), 0, tau)
        vars['smooth_gamma'] = smooth_gamma

    return vars
Beispiel #35
0
def spline(name, ages, knots, smoothing, interpolation_method='linear'):
    """ Generate PyMC objects for a piecewise constant Gaussian process (PCGP) model

    Parameters
    ----------
    name : str
    knots : array, locations of the discontinuities in the piecewise constant function
    ages : array, points to interpolate to
    smoothing : pymc.Node, smoothness parameter for smoothing spline
    interpolation_method : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, 'cubic'

    Results
    -------
    Returns dict of PyMC objects, including 'gamma' and 'mu_age'
    the observed stochastic likelihood and data predicted stochastic
    """
    assert pl.all(pl.diff(knots) > 0), 'Spline knots must be strictly increasing'
    
    gamma = [mc.Normal('gamma_%s_%d'%(name,k), 0., 10.**-2, value=-10.) for k in knots]
    #gamma = [mc.Uniform('gamma_%s_%d'%(name,k), -20., 20., value=-10.) for k in knots]

    # TODO: fix AdaptiveMetropolis so that this is not necessary
    flat_gamma = mc.Lambda('flat_gamma_%s'%name, lambda gamma=gamma: pl.array([x for x in pl.flatten(gamma)]))


    import scipy.interpolate
    @mc.deterministic(name='mu_age_%s'%name)
    def mu_age(gamma=flat_gamma, knots=knots, ages=ages):
        mu = scipy.interpolate.interp1d(knots, pl.exp(gamma), kind=interpolation_method, bounds_error=False, fill_value=0.)
        return mu(ages)

    vars = dict(gamma=gamma, mu_age=mu_age, ages=ages, knots=knots)

    if (smoothing > 0) and (not pl.isinf(smoothing)):
        print 'adding smoothing of', smoothing
        @mc.potential(name='smooth_mu_%s'%name)
        def smooth_gamma(gamma=flat_gamma, knots=knots, tau=smoothing**-2):
            # the following is to include a "noise floor" so that level value
            # zero prior does not exert undue influence on age pattern
            # smoothing
            gamma = gamma.clip(pl.log(pl.exp(gamma).mean()/10.), pl.inf)  # only include smoothing on values within 10x of mean

            return mc.normal_like(pl.sqrt(pl.sum(pl.diff(gamma)**2 / pl.diff(knots))), 0, tau)
        vars['smooth_gamma'] = smooth_gamma

    return vars
Beispiel #36
0
 def plotC2(self, t, ax):
     ax.cla()
     X = self.X
     Y = self.Y
     u = self.matx[t]
     #ax.set_title(u'Gradiente del nivel (Problema directo, dirección de flujo)')
     ##Esta pregunta es para que no salten los warnings porque los vectores son 000
     divi = np.zeros((len(self.Y), len(self.X)), float)
     if not p.all(np.equal(u, divi)):
         if self.tipodis == None or self.tipodis != "Logaritmica":
             ##                print 'Aún no disponible para este tipo de discretización'
             #else:
             if self.tipodis == None:
                 v = self.maty[t] * -1
             elif self.tipodis == "Lineal":
                 #quiver(x,y,gxh(:,:,i),gyh(:,:,i));
                 v = self.maty[t]
             q = ax.quiver(X, Y, u, v, color=['r'])
Beispiel #37
0
 def plotC2(self, t, ax):
     ax.cla()
     X = self.X
     Y = self.Y
     u = self.matx[t]
     # ax.set_title(u'Gradiente del nivel (Problema directo, dirección de flujo)')
     ##Esta pregunta es para que no salten los warnings porque los vectores son 000
     divi = np.zeros((len(self.Y), len(self.X)), float)
     if not p.all(np.equal(u, divi)):
         if self.tipodis == None or self.tipodis != "Logaritmica":
             ##                print 'Aún no disponible para este tipo de discretización'
             # else:
             if self.tipodis == None:
                 v = self.maty[t] * -1
             elif self.tipodis == "Lineal":
                 # quiver(x,y,gxh(:,:,i),gyh(:,:,i));
                 v = self.maty[t]
             q = ax.quiver(X, Y, u, v, color=["r"])
Beispiel #38
0
    def plotC(self, t):

        if self.axt != None:

            self.axt.cla()
            self.fig.delaxes(self.axt)
            self.axt = None

        if self.ax == None:

            self.ax = self.fig.add_subplot(111)

        ax = self.ax
        ax.cla()

        X = self.X
        Y = self.Y
        u = self.matx[t]
        ax.set_title(
            u'Gradiente del nivel (Problema directo, dirección de flujo)')

        ##Esta pregunta es para que no salten los warnings porque los vectores son 000
        divi = np.zeros((len(self.Y), len(self.X)), float)

        if not p.all(np.equal(u, divi)):

            if self.tipodis != None and self.tipodis == "Logaritmica":

                print 'Aún no disponible para este tipo de discretización'

            else:

                if self.tipodis == None:

                    v = self.maty[t] * -1

                elif self.tipodis == "Lineal":

                    #quiver(x,y,gxh(:,:,i),gyh(:,:,i));
                    v = self.maty[t]

                q = ax.quiver(X, Y, u, v, color=['r'])
def visualize_steps(mod, fname="mod.avi", description_str=""):
    times = list(pl.arange(0, 30, 0.2)) + range(30, 200) + range(200, 1500, 10)
    times += range(1500, 1700) + range(1700, 3000, 10)
    times += range(3000, 3200) + range(3200, len(mod.X.trace()), 10)
    assert pl.all(pl.diff(times) >= 0.0), "movies where time is not increasing are confusing and probably unintentional"
    try:
        print "generating %d images" % len(times)
        for i, t in enumerate(times):
            if i % 100 == 99:
                print "%d of %d (t=%.2f)" % (i, len(times), t)
            sys.stdout.flush()
            visualize_single_step(mod, int(t), t - int(t), description_str)
            pl.savefig("mod%06d.png" % i)
    except KeyboardInterrupt:
        pass

    import subprocess

    subprocess.call("mencoder mf://mod*.png -mf w=800:h=600 -ovc x264 -of avi -o %s" % fname, shell=True)
    subprocess.call("mplayer -loop 1 %s" % fname, shell=True)
    subprocess.call("rm mod*.png", shell=True)
Beispiel #40
0
def pseudoSpect(A,
                npts=200,
                s=2.,
                gridPointSelect=100,
                verbose=True,
                lstSqSolve=True):
    """ 
    original code from http://www.cs.ox.ac.uk/projects/pseudospectra/psa.m
    % psa.m - Simple code for 2-norm pseudospectra of given matrix A.
    %         Typically about N/4 times faster than the obvious SVD method.
    %         Comes with no guarantees!   - L. N. Trefethen, March 1999.
    
    parameter: A: the matrix to analyze
               npts: number of points at the grid
               s: axis limits (-s ... +s)
               gridPointSelect: ???
               verbose: prints progress messages
               lstSqSolve: if true, use least squares in algorithm where
                  solve could be used (probably) instead. (replacement for
                  ldivide in MatLab)
    """

    from scipy.linalg import schur, triu
    from pylab import (meshgrid, norm, dot, zeros, eye, diag, find, linspace,
                       arange, isreal, inf, ones, lstsq, solve, sqrt, randn,
                       eig, all)

    ldiv = lambda M1, M2: lstsq(M1, M2)[
        0] if lstSqSolve else lambda M1, M2: solve(M1, M2)

    def planerot(x):
        '''
        return (G,y)
        with a matrix G such that y = G*x with y[1] = 0    
        '''
        G = zeros((2, 2))
        xn = x / norm(x)
        G[0, 0] = xn[0]
        G[1, 0] = -xn[1]
        G[0, 1] = xn[1]
        G[1, 1] = xn[0]
        return G, dot(G, x)

    xmin = -s
    xmax = s
    ymin = -s
    ymax = s
    x = linspace(xmin, xmax, npts, endpoint=False)
    y = linspace(ymin, ymax, npts, endpoint=False)
    xx, yy = meshgrid(x, y)
    zz = xx + 1j * yy

    #% Compute Schur form and plot eigenvalues:
    T, Z = schur(A, output='complex')

    T = triu(T)
    eigA = diag(T)

    # Reorder Schur decomposition and compress to interesting subspace:
    select = find(eigA.real > -250)  # % <- ALTER SUBSPACE SELECTION
    n = len(select)
    for i in arange(n):
        for k in arange(select[i] - 1, i, -1):  #:-1:i
            G = planerot([T[k, k + 1],
                          T[k, k] - T[k + 1, k + 1]])[0].T[::-1, ::-1]
            J = slice(k, k + 2)
            T[:, J] = dot(T[:, J], G)
            T[J, :] = dot(G.T, T[J, :])

    T = triu(T[:n, :n])
    I = eye(n)

    # Compute resolvent norms by inverse Lanczos iteration and plot contours:
    sigmin = inf * ones((len(y), len(x)))
    #A = eye(5)
    niter = 0
    for i in arange(len(y)):  # 1:length(y)
        if all(isreal(A)) and (ymax == -ymin) and (i > len(y) / 2):
            sigmin[i, :] = sigmin[len(y) - i, :]
        else:
            for jj in arange(len(x)):
                z = zz[i, jj]
                T1 = z * I - T
                T2 = T1.conj().T
                if z.real < gridPointSelect:  # <- ALTER GRID POINT SELECTION
                    sigold = 0
                    qold = zeros((n, 1))
                    beta = 0
                    H = zeros((100, 100))
                    q = randn(n, 1) + 1j * randn(n, 1)
                    while norm(q) < 1e-8:
                        q = randn(n, 1) + 1j * randn(n, 1)
                    q = q / norm(q)
                    for k in arange(99):
                        v = ldiv(T1, (ldiv(T2, q))) - dot(beta, qold)
                        #stop
                        alpha = dot(q.conj().T, v).real
                        v = v - alpha * q
                        beta = norm(v)
                        qold = q
                        q = v / beta
                        H[k + 1, k] = beta
                        H[k, k + 1] = beta
                        H[k, k] = alpha
                        if (alpha > 1e100):
                            sig = alpha
                        else:
                            sig = max(abs(eig(H[:k + 1, :k + 1])[0]))
                        if (abs(sigold / sig - 1) < .001) or (sig < 3
                                                              and k > 2):
                            break
                        sigold = sig
                        niter += 1
                        #print 'niter = ', niter

                #%text(x(jj),y(i),num2str(k))         % <- SHOW ITERATION COUNTS
                    sigmin[i, jj] = 1. / sqrt(sig)
                #end
                #  end
        if verbose:
            print 'finished line ', str(i), ' out of ', str(len(y))

    return x, y, sigmin
Beispiel #41
0
def fill_nan(data, max_len=None, fill_ends=True):
    """
    Fills the "nan" fields of a 1D array with linear interpolated values.
    At the edges, constant values are assumed.
    
    :args:
       data (1d array): the input data
       max_len (int or None): maximal length of gaps to fill
       fill_ends (bool): whether or not to fill the ends
    
    :returns:
        data' (1d array): a copy of the input data, where `nan`-values are
        replaced by a linear interpolation between adjacent values
    """
    res = data.copy()
    if all(isnan(data)):
        return res
    missing_idx = find(isnan(data))
    
    # group to missing segments
    missing_segs = []
    gap_lengths = []
    lastidx = -2 # some invalid index: idx == lastidx + 1 cannot be true for this!
    startidx = -2 # some invalid index
    gaplen = 0
    for idx in missing_idx:
        if idx == lastidx + 1:
            # all right, the segment continues
            lastidx = idx            
            gaplen += 1
        else:
            # a new segment has started            
            # first: "close" old segment if exists
            if startidx >= 0:
                missing_segs.append([startidx, lastidx])
                gap_lengths.append(gaplen)
            # now: initialize new segment
            gaplen = 1
            startidx = idx
            lastidx = idx
    
    # manually close the last segment if exists
    if startidx >= 0:
        if lastidx < len(data) - 1 or fill_ends: # skip edge if not fill_ends
            missing_segs.append([startidx, lastidx])
    
    # fill missing segments
    for seg in missing_segs:
        start_idx, stop_idx = seg
        if max_len is not None:
            if stop_idx - start_idx > max_len:
                continue
        # if startpoint is missing: constant value
        if start_idx == 0 and fill_ends:
            res[:stop_idx + 1] = res[stop_idx + 1]
        # if endpoint is missing: use constant value
        elif stop_idx == len(data)-1 and fill_ends:
            res[start_idx:] = res[start_idx - 1]
        # else: linear interpolation
        else:

            res[start_idx: stop_idx+1] = interp(range(start_idx, stop_idx + 1), 
                [start_idx - 1, stop_idx + 1], data[[start_idx - 1, stop_idx + 1]])
        
    return res
Beispiel #42
0
def pseudoSpect(A, npts=200, s=2., gridPointSelect=100, verbose=True,
                lstSqSolve=True):
    """ 
    original code from http://www.cs.ox.ac.uk/projects/pseudospectra/psa.m
    % psa.m - Simple code for 2-norm pseudospectra of given matrix A.
    %         Typically about N/4 times faster than the obvious SVD method.
    %         Comes with no guarantees!   - L. N. Trefethen, March 1999.
    
    parameter: A: the matrix to analyze
               npts: number of points at the grid
               s: axis limits (-s ... +s)
               gridPointSelect: ???
               verbose: prints progress messages
               lstSqSolve: if true, use least squares in algorithm where
                  solve could be used (probably) instead. (replacement for
                  ldivide in MatLab)
    """
    
    from scipy.linalg import schur, triu
    from pylab import (meshgrid, norm, dot, zeros, eye, diag, find,  linspace,                       
                       arange, isreal, inf, ones, lstsq, solve, sqrt, randn,
                       eig, all)

    ldiv = lambda M1,M2 :lstsq(M1,M2)[0] if lstSqSolve else lambda M1,M2: solve(M1,M2)

    def planerot(x):
        '''
        return (G,y)
        with a matrix G such that y = G*x with y[1] = 0    
        '''
        G = zeros((2,2))
        xn = x / norm(x)
        G[0,0] = xn[0]
        G[1,0] = -xn[1]
        G[0,1] = xn[1]
        G[1,1] = xn[0]
        return G, dot(G,x)

    xmin = -s
    xmax = s
    ymin = -s
    ymax = s;  
    x = linspace(xmin,xmax,npts,endpoint=False)
    y = linspace(ymin,ymax,npts,endpoint=False)
    xx,yy = meshgrid(x,y)
    zz = xx + 1j*yy
     
    #% Compute Schur form and plot eigenvalues:
    T,Z = schur(A,output='complex');
        
    T = triu(T)
    eigA = diag(T)
    
    # Reorder Schur decomposition and compress to interesting subspace:
    select = find( eigA.real > -250)           # % <- ALTER SUBSPACE SELECTION
    n = len(select)
    for i in arange(n):
        for k in arange(select[i]-1,i,-1): #:-1:i
            G = planerot([T[k,k+1],T[k,k]-T[k+1,k+1]] )[0].T[::-1,::-1]
            J = slice(k,k+2)
            T[:,J] = dot(T[:,J],G)
            T[J,:] = dot(G.T,T[J,:])
          
    T = triu(T[:n,:n])
    I = eye(n);
    
    # Compute resolvent norms by inverse Lanczos iteration and plot contours:
    sigmin = inf*ones((len(y),len(x)));
    #A = eye(5)
    niter = 0
    for i in arange(len(y)): # 1:length(y)        
        if all(isreal(A)) and (ymax == -ymin) and (i > len(y)/2):
            sigmin[i,:] = sigmin[len(y) - i,:]
        else:
            for jj in arange(len(x)):
                z = zz[i,jj]
                T1 = z * I - T 
                T2 = T1.conj().T
                if z.real < gridPointSelect:    # <- ALTER GRID POINT SELECTION
                    sigold = 0
                    qold = zeros((n,1))
                    beta = 0
                    H = zeros((100,100))                
                    q = randn(n,1) + 1j*randn(n,1)                
                    while norm(q) < 1e-8:
                        q = randn(n,1) + 1j*randn(n,1)                
                    q = q/norm(q)
                    for k in arange(99):
                        v = ldiv(T1,(ldiv(T2,q))) - dot(beta,qold)
                        #stop
                        alpha = dot(q.conj().T, v).real
                        v = v - alpha*q
                        beta = norm(v)
                        qold = q
                        q = v/beta
                        H[k+1,k] = beta
                        H[k,k+1] = beta
                        H[k,k] = alpha
                        if (alpha > 1e100):
                            sig = alpha 
                        else:
                            sig = max(abs(eig(H[:k+1,:k+1])[0]))
                        if (abs(sigold/sig-1) < .001) or (sig < 3 and k > 2):
                            break
                        sigold = sig
                        niter += 1
                        #print 'niter = ', niter
                
                  #%text(x(jj),y(i),num2str(k))         % <- SHOW ITERATION COUNTS
                    sigmin[i,jj] = 1./sqrt(sig);
                #end
                #  end
        if verbose:
            print 'finished line ', str(i), ' out of ', str(len(y))
    
    return x,y,sigmin
Beispiel #43
0
def spline(name, ages, knots, smoothing, interpolation_method='linear'):
    """ Generate PyMC objects for a piecewise constant Gaussian process (PCGP) model

    Parameters
    ----------
    name : str
    knots : array, locations of the discontinuities in the piecewise constant function
    ages : array, points to interpolate to
    smoothing : pymc.Node, smoothness parameter for smoothing spline
    interpolation_method : str, optional, one of 'linear', 'nearest', 'zero', 'slinear', 'quadratic, 'cubic'

    Results
    -------
    Returns dict of PyMC objects, including 'gamma' and 'mu_age'
    the observed stochastic likelihood and data predicted stochastic
    """
    assert pl.all(
        pl.diff(knots) > 0), 'Spline knots must be strictly increasing'

    gamma = [
        mc.Normal('gamma_%s_%d' % (name, k), 0., 10.**-2, value=-10.)
        for k in knots
    ]
    #gamma = [mc.Uniform('gamma_%s_%d'%(name,k), -20., 20., value=-10.) for k in knots]

    # TODO: fix AdaptiveMetropolis so that this is not necessary
    flat_gamma = mc.Lambda(
        'flat_gamma_%s' % name,
        lambda gamma=gamma: pl.array([x for x in pl.flatten(gamma)]))

    import scipy.interpolate

    @mc.deterministic(name='mu_age_%s' % name)
    def mu_age(gamma=flat_gamma, knots=knots, ages=ages):
        mu = scipy.interpolate.interp1d(knots,
                                        pl.exp(gamma),
                                        kind=interpolation_method,
                                        bounds_error=False,
                                        fill_value=0.)
        return mu(ages)

    vars = dict(gamma=gamma, mu_age=mu_age, ages=ages, knots=knots)

    if (smoothing > 0) and (not pl.isinf(smoothing)):
        print 'adding smoothing of', smoothing

        @mc.potential(name='smooth_mu_%s' % name)
        def smooth_gamma(gamma=flat_gamma, knots=knots, tau=smoothing**-2):
            # the following is to include a "noise floor" so that level value
            # zero prior does not exert undue influence on age pattern
            # smoothing
            gamma = gamma.clip(
                pl.log(pl.exp(gamma).mean() / 10.),
                pl.inf)  # only include smoothing on values within 10x of mean

            return mc.normal_like(
                pl.sqrt(pl.sum(pl.diff(gamma)**2 / pl.diff(knots))), 0, tau)

        vars['smooth_gamma'] = smooth_gamma

    return vars
Beispiel #44
0
def fit_posterior(dm,
                  region,
                  sex,
                  year,
                  fast_fit=False,
                  inconsistent_fit=False,
                  params_to_fit=['p', 'r', 'i'],
                  zero_re=True,
                  posteriors_only=False):
    """ Fit posterior of specified region/sex/year for specified model

    Parameters
    ----------
    dm : DiseaseJson
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years

    fast_fit : sample 101 draws from posterior, don't try for convergence (fast for testing)
    inconsistent_fit : fit parameters  separately
    params_to_fit : list of params to fit, if not fitting all consistently

    zero_re : bool, if true, enforce constraint that sibling area REs sum to zero
    posteriors_only : bool, if tru use data from 1997-2007 for 2005 and from 2007 on for 2010

    Example
    -------
    >>> import fit_posterior
    >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005')
    """
    dir = dismod3.settings.JOB_WORKING_DIR % dm.id

    ## load the model from disk or from web
    import simplejson as json
    import data
    reload(data)

    try:
        model = data.ModelData.load(dir)
        print 'loaded data from new format from %s' % dir
    except (IOError, AssertionError):
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        #model.save(dir)
        print 'loaded data from json, saved in new format for next time in %s' % dir

    # TODO: check for missing covariates, and have them fixed, instead of filling them with zeros

    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith('x_'):
            model.input_data[col] = model.input_data[col].fillna(0.)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    predict_area = dismod3.utils.clean(region)
    predict_sex = dismod3.utils.clean(sex)
    predict_year = int(year)

    ## load emp_priors dict from dm.params
    param_type = dict(i='incidence',
                      p='prevalence',
                      r='remission',
                      f='excess-mortality',
                      rr='relative-risk',
                      pf='prevalence_x_excess-mortality',
                      m_with='mortality')
    emp_priors = {}
    for t in 'i r p f'.split():

        # uncomment below to not use empirical prior for rate with zero data
        # if pl.all(model.input_data['data_type'] != t):
        #     continue

        #key = dismod3.utils.gbd_key_for(param_type[t], model.hierarchy.predecessors(predict_area)[0], year, sex)
        key = dismod3.utils.gbd_key_for(param_type[t], predict_area, year, sex)
        mu = dm.get_mcmc('emp_prior_mean', key)
        #mu = dm.get_mcmc('emp_prior_median', key)
        sigma = dm.get_mcmc('emp_prior_std', key)

        if len(mu) == 101 and len(sigma) == 101:
            emp_priors[t, 'mu'] = mu

            # TODO: determine best way to propagate prior on function
            emp_priors[t, 'sigma'] = sigma

            # ALT 1: scale so that the joint probability is not a
            # function of the length of the age function
            # emp_priors[t, 'sigma'] = sigma * pl.sqrt(len(sigma))

        ## update model.parameters['random_effects'] if there is information in the disease model
        expert_priors = model.parameters[t].get('random_effects', {})
        model.parameters[t]['random_effects'] = dm.get_empirical_prior(
            param_type[t]).get('new_alpha', {})
        model.parameters[t]['random_effects'].update(expert_priors)

        # shift random effects to make REs for observed children of predict area have mean zero
        re_mean = pl.mean([model.parameters[t]['random_effects'][area]['mu'] \
                           for area in model.hierarchy.neighbors(predict_area) \
                           if area in model.parameters[t]['random_effects']])
        for area in model.hierarchy.neighbors(predict_area):
            if area in model.parameters[t]['random_effects']:
                model.parameters[t]['random_effects'][area]['mu'] -= re_mean

        ## update model.parameters['fixed_effects'] if there is information in the disease model
        expert_fe_priors = model.parameters[t].get('fixed_effects', {})
        model.parameters[t]['fixed_effects'].update(
            dm.get_empirical_prior(param_type[t]).get('new_beta', {}))

    ## create model and priors for region/sex/year
    # select data that is about areas in this region, recent years, and sex of male or total only
    assert predict_area in model.hierarchy, 'region %s not found in area hierarchy' % predict_area
    subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area)

    def is_relevant(r):
        if (r['area'] not in subtree) and r['area'] != 'all':
            return False

        if predict_year == 1990:
            if r['year_start'] > 1997:
                return False
        elif predict_year == 2005:
            if posteriors_only:
                if r['year_end'] < 1997 or r['year_start'] > 2007:
                    return False
            else:
                if r['year_end'] < 1997:
                    return False
        elif predict_year == 2010:
            if posteriors_only:
                if r['data_type'] == 'm_all':
                    # include m_all data from 2005, since 2010 is not loaded
                    if r['year_end'] < 1997:
                        return False
                else:
                    if r['year_end'] < 2007:
                        return False
            else:
                if r['year_end'] < 1997:
                    return False
        else:
            assert 0, 'Predictions for year %d not yet implemented' % predict_year

        if r['sex'] not in [predict_sex, 'total']:
            return False

        return True

    old_relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                         if (r['area'] in subtree or r['area'] == 'all')\
                         and ((predict_year >= 1997 and r['year_end'] >= 1997) or
                              (predict_year <= 1997 and r['year_start'] <= 1997)) \
                         and r['sex'] in [predict_sex, 'total']]

    relevant_rows = model.input_data.index[model.input_data.apply(is_relevant,
                                                                  axis=1)]

    if predict_year == 1990:
        assert pl.all(
            relevant_rows == old_relevant_rows
        ), "relevant rows should be the same in new and old implementation for 1990"

    if not posteriors_only:
        assert pl.all(
            relevant_rows == old_relevant_rows
        ), "relevant rows should be the same in new and old implementation when posteriors_only is False"

    model.input_data = model.input_data.ix[relevant_rows]

    # replace area 'all' with predict_area
    model.input_data['area'][model.input_data['area'] == 'all'] = predict_area

    if inconsistent_fit:
        # generate fits for requested parameters inconsistently
        for t in params_to_fit:
            model.vars += ism.age_specific_rate(
                model,
                t,
                reference_area=predict_area,
                reference_sex=predict_sex,
                reference_year=predict_year,
                mu_age=None,
                mu_age_parent=emp_priors.get((t, 'mu')),
                sigma_age_parent=emp_priors.get((t, 'sigma')),
                rate_type=(t == 'rr') and 'log_normal' or 'neg_binom',
                zero_re=zero_re)
            if fast_fit:
                dismod3.fit.fit_asr(model,
                                    t,
                                    iter=101,
                                    burn=0,
                                    thin=1,
                                    tune_interval=100)
            else:
                dismod3.fit.fit_asr(model,
                                    t,
                                    iter=iter,
                                    burn=burn,
                                    thin=thin,
                                    tune_interval=100)

    else:
        model.vars += ism.consistent(model,
                                     reference_area=predict_area,
                                     reference_sex=predict_sex,
                                     reference_year=predict_year,
                                     priors=emp_priors,
                                     zero_re=zero_re)

        ## fit model to data
        if fast_fit:
            dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, 105, 0, 1, 100)
        else:
            dm.map, dm.mcmc = dismod3.fit.fit_consistent(model,
                                                         iter=iter,
                                                         burn=burn,
                                                         thin=thin,
                                                         tune_interval=100,
                                                         verbose=True)

    # generate estimates
    posteriors = {}
    for t in 'i r f p rr pf m_with X'.split():
        if t in model.vars:
            if t in model.parameters and 'level_bounds' in model.parameters[t]:
                lower = model.parameters[t]['level_bounds']['lower']
                upper = model.parameters[t]['level_bounds']['upper']
            else:
                lower = 0
                upper = pl.inf
            posteriors[t] = covariate_model.predict_for(
                model,
                model.parameters.get(t, {}),
                predict_area,
                predict_sex,
                predict_year,
                predict_area,
                predict_sex,
                predict_year,
                True,  # population weighted averages
                model.vars[t],
                lower,
                upper)
    try:
        graphics.plot_fit(model, vars, emp_priors, {})
        pl.savefig(dir + '/image/posterior-%s+%s+%s.png' %
                   (predict_area, predict_sex, predict_year))
    except Exception, e:
        print 'Error generating output graphics'
        print e
Beispiel #45
0
def win(board, letter):
    wins = logical_or(board == letter, board == 'T')
    return any(all(wins, 0)) or any(all(wins, 1)) or all(diag(wins)) or \
      all(diag(rot90(wins)))
Beispiel #46
0
def setup_asr_step_methods(m, vars, additional_stochs=[]):
    # groups RE stochastics that are suspected of being dependent
    groups = []
    fe_group = [
        n for n in vars.get('beta', []) if isinstance(n, mc.Stochastic)
    ]
    ap_group = [
        n for n in vars.get('gamma', []) if isinstance(n, mc.Stochastic)
    ]
    groups += [[g_i, g_j] for g_i, g_j in zip(ap_group[1:], ap_group[:-1])
               ] + [fe_group, ap_group, fe_group + ap_group]

    for a in vars.get('hierarchy', []):
        group = []

        col_map = dict([[key, i] for i, key in enumerate(vars['U'].columns)])

        if a in vars['U']:
            for b in nx.shortest_path(vars['hierarchy'], 'all', a):
                if b in vars['U']:
                    n = vars['alpha'][col_map[b]]
                    if isinstance(n, mc.Stochastic):
                        group.append(n)
        groups.append(group)
        #if len(group) > 0:
        #group += ap_group
        #groups.append(group)
        #group += fe_group
        #groups.append(group)

    for stoch in groups:
        if len(stoch) > 0 and pl.all(
            [isinstance(n, mc.Stochastic) for n in stoch]):
            # only step certain stochastics, for understanding convergence
            #if 'gamma_i' not in stoch[0].__name__:
            #    print 'no stepper for', stoch
            #    m.use_step_method(mc.NoStepper, stoch)
            #    continue

            #print 'finding Normal Approx for', [n.__name__ for n in stoch]
            if additional_stochs == []:
                vars_to_fit = [
                    vars.get('p_obs'),
                    vars.get('pi_sim'),
                    vars.get('smooth_gamma'),
                    vars.get('parent_similarity'),
                    vars.get('mu_sim'),
                    vars.get('mu_age_derivative_potential'),
                    vars.get('covariate_constraint')
                ]
            else:
                vars_to_fit = additional_stochs

            try:
                raise ValueError
                na = mc.NormApprox(vars_to_fit + stoch)
                na.fit(method='fmin_powell', verbose=0)
                cov = pl.array(pl.inv(-na.hess), order='F')
                #print 'opt:', pl.round_([n.value for n in stoch], 2)
                #print 'cov:\n', cov.round(4)
                if pl.all(pl.eigvals(cov) >= 0):
                    m.use_step_method(mc.AdaptiveMetropolis, stoch, cov=cov)
                else:
                    raise ValueError
            except ValueError:
                #print 'cov matrix is not positive semi-definite'
                m.use_step_method(mc.AdaptiveMetropolis, stoch)
Beispiel #47
0
    def solve_stability(self):
        """
        Solve the 2nd-order stability problem
        """
        def global_mask(fun1, fun2, V):

            # Find the indices where fun1 ~= fun2 at a tolerance
            diff = fun1.vector() - fun2.vector()
            diff.abs()
            diff_glob = PETScVector(mpi_comm_self())
            diff.gather(diff_glob, pl.array(range(V.dim()), "intc"))
            mask = diff_glob < DOLFIN_EPS_LARGE
            return mask

        # Locate the elastic part
        mask = global_mask(self.alpha, self.alpha_prev, self.V_alpha)
        if pl.all(mask):
            self.print0("\033[1;36m    2nd stability: elastic phase\033[1;m")
            self.rq = pl.inf
            return True
        else:
            self._u_alpha_prev.vector()[:] = 0.0
            self._u_alpha.vector()[:] = 1.0
            assign(self._u_alpha_prev.sub(1), self.alpha_prev)
            assign(self._u_alpha.sub(1), self.alpha)
            mask = global_mask(self._u_alpha, self._u_alpha_prev,
                               self._V_u_alpha)
            self.elas_dofs = set((pl.where(mask == True)[0]).astype(pl.intc))

        bc_elas_dofs = self.elas_dofs.union(self.bc_dofs)
        indices = sorted(
            set(range(self.ownership[0], self.ownership[1])) - bc_elas_dofs)

        # Assemble K and M
        self._K = PETScMatrix()
        self._M = PETScMatrix()
        assemble(self._rqP, self._K)
        assemble(self._rqN, self._M)
        self._K_mat = self._K.mat()
        self._M_mat = self._M.mat()

        # Eliminate the elastic/BC part using PETSc.IS
        self.IS = PETSc.IS()
        self.IS.createGeneral(indices)
        self._K_mat_reduced = self._K_mat.getSubMatrix(self.IS, self.IS)
        self._K = PETScMatrix(self._K_mat_reduced)
        self._M_mat_reduced = self._M_mat.getSubMatrix(self.IS, self.IS)
        self._M = PETScMatrix(self._M_mat_reduced)

        # Stop if M ~= 0
        if self._M.norm("linf") < DOLFIN_EPS_LARGE:
            self.rq = pl.inf
            self.print0(
                "\033[1;36m    2nd stability: Rayleigh quotient: %.3e\033[1;m"
                % self.rq)
            return True

        # Setup the eigenvalue solver
        self.eigensolver = SLEPcEigenSolver(self._K, self._M)
        self.set_eigensolver_parameters()

        # Use last known directions for initial guess
        assign(self._u_alpha.sub(0), self.V)
        assign(self._u_alpha.sub(1), self.Beta)
        _u_alpha_vec = as_backend_type(self._u_alpha.vector()).vec()
        _u_alpha_vec_reduced = _u_alpha_vec.getSubVector(self.IS)
        # self.eps.setInitialSpace(_u_alpha_vec_reduced)

        # Solve the eigenvalue problem
        self.print0(
            "\033[1;36m    2nd stability: solving the eigenvalue problem\033[1;m"
        )
        self.eps.solve()
        r, c, rx, cx = self.eigensolver.get_eigenpair(0)
        self.print0("\033[1;36m    2nd stability: smallest ev: %.3e\033[1;m" %
                    r)

        # From reduced vector to full vector
        self.scatter = PETSc.Scatter()
        rx_vec = as_backend_type(rx).vec()
        self.scatter.create(_u_alpha_vec_reduced, None, _u_alpha_vec, self.IS)
        _u_alpha_vec.zeroEntries()
        self.scatter.scatter(rx_vec, _u_alpha_vec)
        _u_alpha_vec.ghostUpdate()

        # Check the Rayleigh quotient (in theory we should have r == rq)
        self.rq = assemble(self.rqP) / assemble(self.rqN)
        if abs(r - self.rq) > DOLFIN_EPS_LARGE:
            self.print0(
                "\033[1;36m    2nd stability: Rayleigh quotient: %.3e\033[1;m"
                % self.rq)

        # Obtain the perturbation directions to V and Beta
        assign(self.V, self._u_alpha.sub(0))
        assign(self.Beta, self._u_alpha.sub(1))

        # Scale V
        u_mean = self.u.vector().norm("l2")
        if self.V.vector().norm("l2") > DOLFIN_EPS_LARGE:
            coeff = u_mean / self.V.vector().norm("l2")
            self.V.vector()[:] = coeff * self.V.vector()

        # Scale and project Beta to the admissible space
        alpha_mean = self.alpha.vector().norm("l2")
        if self.Beta.vector().norm("l2") > DOLFIN_EPS_LARGE:
            coeff = alpha_mean / self.Beta.vector().norm("l2")
            self.Beta.vector()[:] = coeff * self.Beta.vector()
        self.Beta.vector()[self.Beta.vector() < 0] = 0.0

        # Determine if the solution is unique
        if self.rq > 1:
            return True
Beispiel #48
0
def fill_nan(data, max_len=None, fill_ends=True):
    """
    Fills the "nan" fields of a 1D array with linear interpolated values.
    At the edges, constant values are assumed.
    
    :args:
       data (1d array): the input data
       max_len (int or None): maximal length of gaps to fill
       fill_ends (bool): whether or not to fill the ends
    
    :returns:
        data' (1d array): a copy of the input data, where `nan`-values are
        replaced by a linear interpolation between adjacent values
    """
    res = data.copy()
    if all(isnan(data)):
        return res
    missing_idx = find(isnan(data))

    # group to missing segments
    missing_segs = []
    gap_lengths = []
    lastidx = -2  # some invalid index: idx == lastidx + 1 cannot be true for this!
    startidx = -2  # some invalid index
    gaplen = 0
    for idx in missing_idx:
        if idx == lastidx + 1:
            # all right, the segment continues
            lastidx = idx
            gaplen += 1
        else:
            # a new segment has started
            # first: "close" old segment if exists
            if startidx >= 0:
                missing_segs.append([startidx, lastidx])
                gap_lengths.append(gaplen)
            # now: initialize new segment
            gaplen = 1
            startidx = idx
            lastidx = idx

    # manually close the last segment if exists
    if startidx >= 0:
        if lastidx < len(data) - 1 or fill_ends:  # skip edge if not fill_ends
            missing_segs.append([startidx, lastidx])

    # fill missing segments
    for seg in missing_segs:
        start_idx, stop_idx = seg
        if max_len is not None:
            if stop_idx - start_idx > max_len:
                continue
        # if startpoint is missing: constant value
        if start_idx == 0 and fill_ends:
            res[:stop_idx + 1] = res[stop_idx + 1]
        # if endpoint is missing: use constant value
        elif stop_idx == len(data) - 1 and fill_ends:
            res[start_idx:] = res[start_idx - 1]
        # else: linear interpolation
        else:

            res[start_idx:stop_idx + 1] = interp(
                range(start_idx, stop_idx + 1), [start_idx - 1, stop_idx + 1],
                data[[start_idx - 1, stop_idx + 1]])

    return res
Beispiel #49
0
def predict_for(model, parameters,
                root_area, root_sex, root_year,
                area, sex, year,
                population_weighted,
                vars,
                lower, upper):
    """ Generate draws from posterior predicted distribution for a
    specific (area, sex, year)

    :Parameters:
      - `model` : data.DataModel
      - `root_area` : str, area for which this model was fit consistently
      - `root_sex` : str, area for which this model was fit consistently
      - `root_year` : str, area for which this model was fit consistently
      - `area` : str, area to predict for
      - `sex` : str, sex to predict for
      - `year` : str, year to predict for
      - `population_weighted` : bool, should prediction be population weighted if it is the aggregation of units area RE hierarchy?
      - `vars` : dict, including entries for alpha, beta, mu_age, U, and X
      - `lower, upper` : float, bounds on predictions from expert priors

    :Results:
      - Returns array of draws from posterior predicted distribution

    """
    area_hierarchy = model.hierarchy
    output_template = model.output_template.copy()

    # find number of samples from posterior
    len_trace = len(vars['mu_age'].trace())

    # compile array of draws from posterior distribution of alpha (random effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each random effect (e.g. countries with data, regions with countries with data, etc)
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['alpha'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['alpha'] is a list of pymc Nodes
    #   vars['alpha'] is a list of floats
    #   vars['alpha'] is a list of some floats and some pymc Nodes
    #   'alpha' is not in vars
    #
    # when vars['alpha'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_alpha_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction
    
    if 'alpha' in vars and isinstance(vars['alpha'], mc.Node):
        assert 0, 'No longer used'
        alpha_trace = vars['alpha'].trace()
    elif 'alpha' in vars and isinstance(vars['alpha'], list):
        alpha_trace = []
        for n, sigma in zip(vars['alpha'], vars['const_alpha_sigma']):
            if isinstance(n, mc.Node):
                alpha_trace.append(n.trace())
            else:
                # uncertainty of constant alpha incorporated here
                sigma = max(sigma, 1.e-9) # make sure sigma is non-zero
                assert not pl.isnan(sigma)
                alpha_trace.append(mc.rnormal(float(n), sigma**-2, size=len_trace))
        alpha_trace = pl.vstack(alpha_trace).T
    else:
        alpha_trace = pl.array([])


    # compile array of draws from posterior distribution of beta (fixed effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each fixed effect
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['beta'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['beta'] is a list of pymc Nodes
    #   vars['beta'] is a list of floats
    #   vars['beta'] is a list of some floats and some pymc Nodes
    #   'beta' is not in vars
    #
    # when vars['beta'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_beta_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction
    #
    # TODO: refactor to reduce duplicate code (this is very similar to code for alpha above)

    if 'beta' in vars and isinstance(vars['beta'], mc.Node):
        assert 0, 'No longer used'
        beta_trace = vars['beta'].trace()
    elif 'beta' in vars and isinstance(vars['beta'], list):
        beta_trace = []
        for n, sigma in zip(vars['beta'], vars['const_beta_sigma']):
            if isinstance(n, mc.Node):
                beta_trace.append(n.trace())
            else:
                # uncertainty of constant beta incorporated here
                sigma = max(sigma, 1.e-9) # make sure sigma is non-zero
                assert not pl.isnan(sigma)
                beta_trace.append(mc.rnormal(float(n), sigma**-2., size=len_trace))
        beta_trace = pl.vstack(beta_trace).T
    else:
        beta_trace = pl.array([])

    # the prediction for the requested area is produced by aggregating predictions for all of the childred
    # of that area in the area_hierarchy (a networkx.DiGraph)

    leaves = [n for n in nx.traversal.bfs_tree(area_hierarchy, area) if area_hierarchy.successors(n) == []]
    if len(leaves) == 0:
        # networkx returns an empty list when the bfs tree is a single node
        leaves = [area]


    # initialize covariate_shift and total_population
    covariate_shift = pl.zeros(len_trace)
    total_population = 0.

    # group output_template for easy access
    output_template = output_template.groupby(['area', 'sex', 'year']).mean()

    # if there are fixed effects, the effect coefficients are stored as an array in vars['X']
    # use this to put together a covariate matrix for the predictions, according to the output_template
    # covariate values
    #
    # the resulting array is covs
    if 'X' in vars:
        covs = output_template.filter(vars['X'].columns)
        if 'x_sex' in vars['X'].columns:
            covs['x_sex'] = sex_value[sex]
        assert pl.all(covs.columns == vars['X_shift'].index), 'covariate columns and unshift index should match up'
        for x_i in vars['X_shift'].index:
            covs[x_i] -= vars['X_shift'][x_i] # shift covariates so that the root node has X_ar,sr,yr == 0
    else:
        covs = pandas.DataFrame(index=output_template.index)

    # if there are random effects, put together an indicator based on
    # their hierarchical relationships
    #
    if 'U' in vars:
        p_U = area_hierarchy.number_of_nodes()  # random effects for area
        U_l = pandas.DataFrame(pl.zeros((1, p_U)), columns=area_hierarchy.nodes())
        U_l = U_l.filter(vars['U'].columns)
    else:
        U_l = pandas.DataFrame(index=[0])

    # loop through leaves of area_hierarchy subtree rooted at 'area',
    # make prediction for each using appropriate random
    # effects and appropriate fixed effect covariates
    #
    for l in leaves:
        log_shift_l = pl.zeros(len_trace)
        U_l.ix[0,:] = 0.

        root_to_leaf = nx.shortest_path(area_hierarchy, root_area, l)
        for node in root_to_leaf[1:]:
            if node not in U_l.columns:
                ## Add a columns U_l[node] = rnormal(0, appropriate_tau)
                level = len(nx.shortest_path(area_hierarchy, 'all', node))-1
                if 'sigma_alpha' in vars:
                    tau_l = vars['sigma_alpha'][level].trace()**-2
                    
                U_l[node] = 0.

                # if this node was not already included in the alpha_trace array, add it
                # there are several cases for adding:
                #  if the random effect has a distribution of Constant
                #    add it, using a sigma as well
                #  otherwise, sample from a normal with mean zero and standard deviation tau_l
                if parameters.get('random_effects', {}).get(node, {}).get('dist') == 'Constant':
                    mu = parameters['random_effects'][node]['mu']
                    sigma = parameters['random_effects'][node]['sigma']
                    sigma = max(sigma, 1.e-9) # make sure sigma is non-zero

                    alpha_node = mc.rnormal(mu,
                                            sigma**-2,
                                            size=len_trace)
                else:
                    if 'sigma_alpha' in vars:
                        alpha_node = mc.rnormal(0., tau_l)
                    else:
                        alpha_node = pl.zeros(len_trace)

                if len(alpha_trace) > 0:
                    alpha_trace = pl.vstack((alpha_trace.T, alpha_node)).T
                else:
                    alpha_trace = pl.atleast_2d(alpha_node).T

            # TODO: implement a more robust way to align alpha_trace and U_l
            U_l.ix[0, node] = 1.

        # 'shift' the random effects matrix to have the intended
        # level of the hierarchy as the reference value
        if 'U_shift' in vars:
            for node in vars['U_shift']:
                U_l -= vars['U_shift'][node]

        # add the random effect intercept shift (len_trace draws)
        log_shift_l += pl.dot(alpha_trace, U_l.T).flatten()
            
        # make X_l
        if len(beta_trace) > 0:
            X_l = covs.ix[l, sex, year]
            log_shift_l += pl.dot(beta_trace, X_l.T).flatten()

        if population_weighted:
            # combine in linear-space with population weights
            shift_l = pl.exp(log_shift_l)
            covariate_shift += shift_l * output_template['pop'][l,sex,year]
            total_population += output_template['pop'][l,sex,year]
        else:
            # combine in log-space without weights
            covariate_shift += log_shift_l
            total_population += 1.

    if population_weighted:
        covariate_shift /= total_population
    else:
        covariate_shift = pl.exp(covariate_shift / total_population)
        
    parameter_prediction = (vars['mu_age'].trace().T * covariate_shift).T
        
    # clip predictions to bounds from expert priors
    parameter_prediction = parameter_prediction.clip(lower, upper)
    
    return parameter_prediction
Beispiel #50
0
def fit_consistent(model, iter=2000, burn=1000, thin=1, tune_interval=100, verbose=False):
    """Fit data model for all epidemiologic parameters using MCMC
    
    :Parameters:
      - `model` : data.ModelData
      - `iter` : int, number of posterior samples fit
      - `burn` : int, number of posterior samples to discard as burn-in
      - `thin` : int, samples thinned by this number
      - `tune_interval` : int
      - `verbose` : boolean

    :Results:
      - returns a pymc.MCMC object created from vars, that has been fit with MCMC

    .. note::
      - `burn` must be less than `iter`
      - `thin` must be less than `iter` minus `burn`

    """
    assert burn < iter, 'burn must be less than iter'
    assert thin < iter - burn, 'thin must be less than iter-burn'

    param_types = 'i r f p pf rr smr m_with X'.split()

    vars = model.vars
    
    start_time = time.time()
    map = mc.MAP(vars)
    m = mc.MCMC(vars)

    ## use MAP to generate good initial conditions
    try:
        method='fmin_powell'
        tol=.001

        fit_model.logger.info('fitting submodels')
        fit_model.find_consistent_spline_initial_vals(vars, method, tol, verbose)

        for t in param_types:
            fit_model.find_re_initial_vals(vars[t], method, tol, verbose)
            fit_model.logger.info('.')

        fit_model.find_consistent_spline_initial_vals(vars, method, tol, verbose)
        fit_model.logger.info('.')

        for t in param_types:
            fit_model.find_fe_initial_vals(vars[t], method, tol, verbose)
            fit_model.logger.info('.')

        fit_model.find_consistent_spline_initial_vals(vars, method, tol, verbose)
        fit_model.logger.info('.')

        for t in param_types:
            fit_model.find_dispersion_initial_vals(vars[t], method, tol, verbose)
            fit_model.logger.info('.')

        fit_model.logger.info('\nfitting all stochs\n')
        map.fit(method=method, tol=tol, verbose=verbose)

        if verbose:
            from fit_posterior import inspect_vars
            print inspect_vars({}, vars)

    except KeyboardInterrupt:
        fit_model.logger.warning('Initial condition calculation interrupted')

    ## use MCMC to fit the model

    try:
        fit_model.logger.info('finding step covariances')
        vars_to_fit = [[vars[t].get('p_obs'), vars[t].get('pi_sim'), vars[t].get('smooth_gamma'), vars[t].get('parent_similarity'),
                        vars[t].get('mu_sim'), vars[t].get('mu_age_derivative_potential'), vars[t].get('covariate_constraint')] for t in param_types]
        max_knots = max([len(vars[t]['gamma']) for t in 'irf'])
        for i in range(max_knots):
            stoch = [vars[t]['gamma'][i] for t in 'ifr' if i < len(vars[t]['gamma'])]

            if verbose:
                print 'finding Normal Approx for', [n.__name__ for n in stoch]
            try:
                na = mc.NormApprox(vars_to_fit + stoch)
                na.fit(method='fmin_powell', verbose=verbose)
                cov = pl.array(pl.inv(-na.hess), order='F')
                if pl.all(pl.eigvals(cov) >= 0):
                    m.use_step_method(mc.AdaptiveMetropolis, stoch, cov=cov)
                else:
                    raise ValueError
            except ValueError:
                if verbose:
                    print 'cov matrix is not positive semi-definite'
                m.use_step_method(mc.AdaptiveMetropolis, stoch)

            fit_model.logger.info('.')

        for t in param_types:
            fit_model.setup_asr_step_methods(m, vars[t], vars_to_fit)

            # reset values to MAP
            fit_model.find_consistent_spline_initial_vals(vars, method, tol, verbose)
            fit_model.logger.info('.')
        map.fit(method=method, tol=tol, verbose=verbose)
        fit_model.logger.info('.')
    except KeyboardInterrupt:
        fit_model.logger.warning('Initial condition calculation interrupted')

    fit_model.logger.info('\nsampling from posterior distribution\n')
    m.iter=iter
    m.burn=burn
    m.thin=thin
    if verbose:
        try:
            m.sample(m.iter, m.burn, m.thin, tune_interval=tune_interval, progress_bar=True, progress_bar_fd=sys.stdout)
        except TypeError:
            m.sample(m.iter, m.burn, m.thin, tune_interval=tune_interval, progress_bar=False, verbose=verbose)
    else:
        m.sample(m.iter, m.burn, m.thin, tune_interval=tune_interval, progress_bar=False)
    m.wall_time = time.time() - start_time

    model.map = map
    model.mcmc = m
    
    return model.map, model.mcmc
 def converged(q):
     n = array([pylab.norm(obj.diff(q)) for _, obj in self.objectives])
     return pylab.all(n < 1e-3)
Beispiel #52
0
    def add_data_vector_segment(self, data_vector_segment, last_segment=False):
        self.message = None
        if self.__prefixed_movie_name__ == None:
            return

        if last_segment:
            self.__save_frames__()
            return

        frame_name = '%s_%06d' % (self.__prefixed_movie_name__, self.idx)
        frame_file = as_path(self.__prefixed_movie_dir__, frame_name + '.png')
        #skip_frame = True if self.idx < self.p.movie_skip_to_frame or \
        #    (self.p.movie_skip_frames and os.path.exists(frame_file)) \
        #    else False
        skip_frame = False

        mean_plus = pl.mean(data_vector_segment.signal_plus)
        mean_minus = pl.mean(data_vector_segment.signal_minus)

        s_plus = len(data_vector_segment.signal_plus)

        _pp_spec = MiniPoincarePlotSpec()
        _pp_spec.idx = self.idx
        _pp_spec.s_plus = s_plus
        _pp_spec.mean_plus = mean_plus
        _pp_spec.mean_minus = mean_minus
        _pp_spec.range = self.range
        _pp_spec.frame_file = frame_file

        if self.idx == 0:

            self.s_size = s_plus
            self.x_data.put(pl.arange(s_plus), data_vector_segment.signal_plus)
            self.y_data.put(pl.arange(s_plus),
                            data_vector_segment.signal_minus)

            ok = True
            old_s_plus = 0
            _pp_spec.level = 0
            _pp_spec.active_start = 0
            _pp_spec.active_stop = s_plus
        else:
            old_s_plus = len(self.old_signal_plus)
            ok = False
            if s_plus >= old_s_plus:
                if pl.all(self.old_signal_plus \
                            == data_vector_segment.signal_plus[:old_s_plus]):
                    old_size = self.s_size
                    new_size = old_size + s_plus - old_s_plus
                    if new_size > old_size:
                        _pp_spec.active_start = old_size
                        _pp_spec.active_stop = new_size

                        if new_size > len(self.x_data):
                            raise Exception(
                                'New size is greater then the signal size !')

                        self.x_data.put(
                            pl.arange(old_size, new_size),
                            data_vector_segment.signal_plus[old_s_plus -
                                                            s_plus:])

                        self.y_data.put(
                            pl.arange(old_size, new_size),
                            data_vector_segment.signal_minus[old_s_plus -
                                                             s_plus:])

                        _pp_spec.inactive_stop = \
                                        self._pp_spec_old.inactive_stop
                        self.s_size = new_size
                    _pp_spec.level = 1
                    ok = True
                else:
                    for idx in xrange(1, old_s_plus):
                        if pl.all(self.old_signal_plus[idx:] \
                                    == data_vector_segment.signal_plus[idx - 1:
                                                            old_s_plus - idx]):
                            old_size = self.s_size
                            new_size = old_size + s_plus - (old_s_plus - idx)

                            if new_size > len(self.x_data):
                                raise Exception(
                                    'New size is greater then the signal size !'
                                )

                            if new_size > old_size:
                                _pp_spec.active_start = old_size
                                _pp_spec.active_stop = new_size

                                self.x_data.put(
                                    pl.arange(old_size,
                                              new_size), data_vector_segment.
                                    signal_plus[old_s_plus - idx:])

                                self.y_data.put(
                                    pl.arange(old_size,
                                              new_size), data_vector_segment.
                                    signal_minus[old_s_plus - idx:])
                                self.s_size = new_size

                            _d = self.s_size - s_plus
                            _pp_spec.inactive_start = _d - idx
                            _pp_spec.inactive_stop = _d

                            _pp_spec.level = 3

                            ok = True
                            break
            else:
                for idx in xrange(1, old_s_plus):
                    if idx + s_plus <= old_s_plus \
                        and pl.all(
                            self.old_signal_plus[idx:idx + s_plus] \
                                    == data_vector_segment.signal_plus):

                        _d = self.s_size - old_s_plus
                        _pp_spec.inactive_start = _d
                        _pp_spec.inactive_stop = _d + idx

                        if _pp_spec.inactive_stop + s_plus < self.s_size:
                            _pp_spec.inactive_start_2 = \
                                        _pp_spec.inactive_stop + s_plus
                            _pp_spec.inactive_stop_2 = self.s_size
                        _pp_spec.level = 2

                        ok = True
                        break
        if ok == True and skip_frame == False:
            _pp_spec.x_data = self.x_data
            _pp_spec.y_data = self.y_data
            _pp_spec.cum_inactive = self.cum_inactive
            _pp_spec.s_size = self.s_size
            #print('PP_SPEC: ' + str(_p))

            self.pp_spec_manager.addMiniPoincarePlotSpec(_pp_spec)
            if self.idx > 0 and \
                (self.p.movie_bin_size > 0
                    and ((self.idx % self.p.movie_bin_size) == 0)):
                if len(self.pp_specs_managers) >= self.core_nums:
                    if self.p.movie_calculate_all_frames == False:
                        self.__save_frames__()
                        self.pp_specs_managers = []

                old_pp_spec_manager = self.pp_spec_manager
                self.pp_spec_manager = MiniPoincarePlotSpecManager()
                self.pp_spec_manager.movie_dir = self.__prefixed_movie_dir__
                self.pp_spec_manager.movie_name = self.__prefixed_movie_name__
                self.pp_spec_manager.movie_dpi = self.p.movie_dpi
                self.pp_spec_manager.movie_fps = self.p.movie_fps
                self.pp_spec_manager.movie_height = self.p.movie_height
                self.pp_spec_manager.movie_width = self.p.movie_width
                self.pp_spec_manager.active_color = self.active_color
                self.pp_spec_manager.inactive_color = self.inactive_color
                self.pp_spec_manager.centroid_color = self.centroid_color
                self.pp_spec_manager.active_point_size = \
                                                self.p.movie_active_size
                self.pp_spec_manager.inactive_point_size = \
                                                self.p.movie_inactive_size
                self.pp_spec_manager.centroid_point_size = \
                                                self.p.movie_centroid_size
                self.pp_spec_manager.show_plot_legends = \
                                            self.p.movie_show_plot_legends
                self.pp_spec_manager.x_label = self.p.x_label
                self.pp_spec_manager.y_label = self.p.y_label
                self.pp_spec_manager.clean_frames = self.p.movie_clean_frames
                self.pp_spec_manager.movie_title = self.p.movie_title
                self.pp_spec_manager.movie_frame_step = self.p.movie_frame_step
                self.pp_spec_manager.movie_identity_line = self.p.movie_identity_line
                self.pp_spec_manager.movie_hour_label = self.p.movie_hour_label
                self.pp_spec_manager.movie_minute_label = self.p.movie_minute_label
                self.pp_spec_manager.movie_second_label = self.p.movie_second_label
                self.pp_spec_manager.movie_time_label_in_line = self.p.movie_time_label_in_line
                self.pp_spec_manager.movie_time_label_font_size = self.p.movie_time_label_font_size
                self.pp_spec_manager.movie_time_label_prefix = self.p.movie_time_label_prefix
                self.pp_spec_manager.movie_title_font_size = self.p.movie_title_font_size
                self.pp_spec_manager.movie_axis_font_size = self.p.movie_axis_font_size
                self.pp_spec_manager.movie_axis_font = self.p.movie_axis_font
                self.pp_spec_manager.movie_title_font = self.p.movie_title_font
                self.pp_spec_manager.movie_tick_font = self.p.movie_tick_font
                self.pp_spec_manager.movie_frame_pad = self.p.movie_frame_pad
                self.pp_spec_manager.movie_create_time_label = self.p.movie_create_time_label
                self.pp_spec_manager.movie_frame_filename_with_time = self.p.movie_frame_filename_with_time

                #add all previous pp specs
                for pp_spec in old_pp_spec_manager.getMiniPoincarePlotSpecs():
                    self.pp_spec_manager.addPreviousPoincarePlotSpecMinimum(
                        pp_spec)
                old_pp_spec_manager = None

                self.pp_specs_managers.append(self.pp_spec_manager)
            self.message = 'Prepare frame: %s' % (frame_name)
        elif ok == True and skip_frame == True:
            self.message = 'Skip frame %s' % (frame_name)
        elif ok == False:
            print('s_plus: ' + str(s_plus) + ' old_s_plus: ' + str(old_s_plus))
            print('old_signal_plus: ' + str(self.old_signal_plus))
            print('signal_plus:     ' + str(data_vector_segment.signal_plus))
            raise Exception('Error for idx ' + str(self.idx))
        if _pp_spec.inactive_start >= 0 and _pp_spec.inactive_stop >= 0:
            #if time array is not None use it as array for cumulative time
            if not self.time == None:
                self.cum_inactive += pl.sum(
                    self.time[_pp_spec.inactive_start:_pp_spec.inactive_stop])
            else:
                self.cum_inactive += pl.sum(
                    self.x_data[_pp_spec.inactive_start:_pp_spec.
                                inactive_stop])

        self.old_signal_plus = data_vector_segment.signal_plus
        self.idx = self.idx + 1
        self._pp_spec_old = _pp_spec
Beispiel #53
0
def predict_for(model, parameters, root_area, root_sex, root_year, area, sex,
                year, population_weighted, vars, lower, upper):
    """ Generate draws from posterior predicted distribution for a
    specific (area, sex, year)

    :Parameters:
      - `model` : data.DataModel
      - `root_area` : str, area for which this model was fit consistently
      - `root_sex` : str, area for which this model was fit consistently
      - `root_year` : str, area for which this model was fit consistently
      - `area` : str, area to predict for
      - `sex` : str, sex to predict for
      - `year` : str, year to predict for
      - `population_weighted` : bool, should prediction be population weighted if it is the aggregation of units area RE hierarchy?
      - `vars` : dict, including entries for alpha, beta, mu_age, U, and X
      - `lower, upper` : float, bounds on predictions from expert priors

    :Results:
      - Returns array of draws from posterior predicted distribution

    """
    area_hierarchy = model.hierarchy
    output_template = model.output_template.copy()

    # find number of samples from posterior
    len_trace = len(vars['mu_age'].trace())

    # compile array of draws from posterior distribution of alpha (random effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each random effect (e.g. countries with data, regions with countries with data, etc)
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['alpha'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['alpha'] is a list of pymc Nodes
    #   vars['alpha'] is a list of floats
    #   vars['alpha'] is a list of some floats and some pymc Nodes
    #   'alpha' is not in vars
    #
    # when vars['alpha'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_alpha_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction

    if 'alpha' in vars and isinstance(vars['alpha'], mc.Node):
        assert 0, 'No longer used'
        alpha_trace = vars['alpha'].trace()
    elif 'alpha' in vars and isinstance(vars['alpha'], list):
        alpha_trace = []
        for n, sigma in zip(vars['alpha'], vars['const_alpha_sigma']):
            if isinstance(n, mc.Node):
                alpha_trace.append(n.trace())
            else:
                # uncertainty of constant alpha incorporated here
                sigma = max(sigma, 1.e-9)  # make sure sigma is non-zero
                assert not pl.isnan(sigma)
                alpha_trace.append(
                    mc.rnormal(float(n), sigma**-2, size=len_trace))
        alpha_trace = pl.vstack(alpha_trace).T
    else:
        alpha_trace = pl.array([])

    # compile array of draws from posterior distribution of beta (fixed effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each fixed effect
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['beta'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['beta'] is a list of pymc Nodes
    #   vars['beta'] is a list of floats
    #   vars['beta'] is a list of some floats and some pymc Nodes
    #   'beta' is not in vars
    #
    # when vars['beta'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_beta_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction
    #
    # TODO: refactor to reduce duplicate code (this is very similar to code for alpha above)

    if 'beta' in vars and isinstance(vars['beta'], mc.Node):
        assert 0, 'No longer used'
        beta_trace = vars['beta'].trace()
    elif 'beta' in vars and isinstance(vars['beta'], list):
        beta_trace = []
        for n, sigma in zip(vars['beta'], vars['const_beta_sigma']):
            if isinstance(n, mc.Node):
                beta_trace.append(n.trace())
            else:
                # uncertainty of constant beta incorporated here
                sigma = max(sigma, 1.e-9)  # make sure sigma is non-zero
                assert not pl.isnan(sigma)
                beta_trace.append(
                    mc.rnormal(float(n), sigma**-2., size=len_trace))
        beta_trace = pl.vstack(beta_trace).T
    else:
        beta_trace = pl.array([])

    # the prediction for the requested area is produced by aggregating predictions for all of the childred
    # of that area in the area_hierarchy (a networkx.DiGraph)

    leaves = [
        n for n in nx.traversal.bfs_tree(area_hierarchy, area)
        if area_hierarchy.successors(n) == []
    ]
    if len(leaves) == 0:
        # networkx returns an empty list when the bfs tree is a single node
        leaves = [area]

    # initialize covariate_shift and total_population
    covariate_shift = pl.zeros(len_trace)
    total_population = 0.

    # group output_template for easy access
    output_template = output_template.groupby(['area', 'sex', 'year']).mean()

    # if there are fixed effects, the effect coefficients are stored as an array in vars['X']
    # use this to put together a covariate matrix for the predictions, according to the output_template
    # covariate values
    #
    # the resulting array is covs
    if 'X' in vars:
        covs = output_template.filter(vars['X'].columns)
        if 'x_sex' in vars['X'].columns:
            covs['x_sex'] = sex_value[sex]
        assert pl.all(covs.columns == vars['X_shift'].index
                      ), 'covariate columns and unshift index should match up'
        for x_i in vars['X_shift'].index:
            covs[x_i] -= vars['X_shift'][
                x_i]  # shift covariates so that the root node has X_ar,sr,yr == 0
    else:
        covs = pandas.DataFrame(index=output_template.index)

    # if there are random effects, put together an indicator based on
    # their hierarchical relationships
    #
    if 'U' in vars:
        p_U = area_hierarchy.number_of_nodes()  # random effects for area
        U_l = pandas.DataFrame(pl.zeros((1, p_U)),
                               columns=area_hierarchy.nodes())
        U_l = U_l.filter(vars['U'].columns)
    else:
        U_l = pandas.DataFrame(index=[0])

    # loop through leaves of area_hierarchy subtree rooted at 'area',
    # make prediction for each using appropriate random
    # effects and appropriate fixed effect covariates
    #
    for l in leaves:
        log_shift_l = pl.zeros(len_trace)
        U_l.ix[0, :] = 0.

        root_to_leaf = nx.shortest_path(area_hierarchy, root_area, l)
        for node in root_to_leaf[1:]:
            if node not in U_l.columns:
                ## Add a columns U_l[node] = rnormal(0, appropriate_tau)
                level = len(nx.shortest_path(area_hierarchy, 'all', node)) - 1
                if 'sigma_alpha' in vars:
                    tau_l = vars['sigma_alpha'][level].trace()**-2

                U_l[node] = 0.

                # if this node was not already included in the alpha_trace array, add it
                # there are several cases for adding:
                #  if the random effect has a distribution of Constant
                #    add it, using a sigma as well
                #  otherwise, sample from a normal with mean zero and standard deviation tau_l
                if parameters.get('random_effects',
                                  {}).get(node, {}).get('dist') == 'Constant':
                    mu = parameters['random_effects'][node]['mu']
                    sigma = parameters['random_effects'][node]['sigma']
                    sigma = max(sigma, 1.e-9)  # make sure sigma is non-zero

                    alpha_node = mc.rnormal(mu, sigma**-2, size=len_trace)
                else:
                    if 'sigma_alpha' in vars:
                        alpha_node = mc.rnormal(0., tau_l)
                    else:
                        alpha_node = pl.zeros(len_trace)

                if len(alpha_trace) > 0:
                    alpha_trace = pl.vstack((alpha_trace.T, alpha_node)).T
                else:
                    alpha_trace = pl.atleast_2d(alpha_node).T

            # TODO: implement a more robust way to align alpha_trace and U_l
            U_l.ix[0, node] = 1.

        # 'shift' the random effects matrix to have the intended
        # level of the hierarchy as the reference value
        if 'U_shift' in vars:
            for node in vars['U_shift']:
                U_l -= vars['U_shift'][node]

        # add the random effect intercept shift (len_trace draws)
        log_shift_l += pl.dot(alpha_trace, U_l.T).flatten()

        # make X_l
        if len(beta_trace) > 0:
            X_l = covs.ix[l, sex, year]
            log_shift_l += pl.dot(beta_trace, X_l.T).flatten()

        if population_weighted:
            # combine in linear-space with population weights
            shift_l = pl.exp(log_shift_l)
            covariate_shift += shift_l * output_template['pop'][l, sex, year]
            total_population += output_template['pop'][l, sex, year]
        else:
            # combine in log-space without weights
            covariate_shift += log_shift_l
            total_population += 1.

    if population_weighted:
        covariate_shift /= total_population
    else:
        covariate_shift = pl.exp(covariate_shift / total_population)

    parameter_prediction = (vars['mu_age'].trace().T * covariate_shift).T

    # clip predictions to bounds from expert priors
    parameter_prediction = parameter_prediction.clip(lower, upper)

    return parameter_prediction
    def add_data_vector_segment(self, data_vector_segment, last_segment=False):
        self.message = None
        if self.__prefixed_movie_name__ == None:
            return

        if last_segment:
            self.__save_frames__()
            return

        frame_name = '%s_%06d' % (self.__prefixed_movie_name__, self.idx)
        frame_file = as_path(self.__prefixed_movie_dir__, frame_name + '.png')
        #skip_frame = True if self.idx < self.p.movie_skip_to_frame or \
        #    (self.p.movie_skip_frames and os.path.exists(frame_file)) \
        #    else False
        skip_frame = False

        mean_plus = pl.mean(data_vector_segment.signal_plus)
        mean_minus = pl.mean(data_vector_segment.signal_minus)

        s_plus = len(data_vector_segment.signal_plus)

        _pp_spec = MiniPoincarePlotSpec()
        _pp_spec.idx = self.idx
        _pp_spec.s_plus = s_plus
        _pp_spec.mean_plus = mean_plus
        _pp_spec.mean_minus = mean_minus
        _pp_spec.range = self.range
        _pp_spec.frame_file = frame_file

        if self.idx == 0:

            self.s_size = s_plus
            self.x_data.put(pl.arange(s_plus), data_vector_segment.signal_plus)
            self.y_data.put(pl.arange(s_plus),
                                            data_vector_segment.signal_minus)

            ok = True
            old_s_plus = 0
            _pp_spec.level = 0
            _pp_spec.active_start = 0
            _pp_spec.active_stop = s_plus
        else:
            old_s_plus = len(self.old_signal_plus)
            ok = False
            if s_plus >= old_s_plus:
                if pl.all(self.old_signal_plus \
                            == data_vector_segment.signal_plus[:old_s_plus]):
                    old_size = self.s_size
                    new_size = old_size + s_plus - old_s_plus
                    if new_size > old_size:
                        _pp_spec.active_start = old_size
                        _pp_spec.active_stop = new_size

                        if new_size > len(self.x_data):
                            raise Exception(
                                'New size is greater then the signal size !')

                        self.x_data.put(pl.arange(old_size, new_size),
                            data_vector_segment.signal_plus[old_s_plus
                                                            - s_plus:])

                        self.y_data.put(pl.arange(old_size, new_size),
                            data_vector_segment.signal_minus[old_s_plus
                                                             - s_plus:])

                        _pp_spec.inactive_stop = \
                                        self._pp_spec_old.inactive_stop
                        self.s_size = new_size
                    _pp_spec.level = 1
                    ok = True
                else:
                    for idx in xrange(1, old_s_plus):
                        if pl.all(self.old_signal_plus[idx:] \
                                    == data_vector_segment.signal_plus[idx - 1:
                                                            old_s_plus - idx]):
                            old_size = self.s_size
                            new_size = old_size + s_plus - (old_s_plus - idx)

                            if new_size > len(self.x_data):
                                raise Exception(
                                'New size is greater then the signal size !')

                            if new_size > old_size:
                                _pp_spec.active_start = old_size
                                _pp_spec.active_stop = new_size

                                self.x_data.put(pl.arange(old_size, new_size),
                                    data_vector_segment.signal_plus[
                                                           old_s_plus - idx:])

                                self.y_data.put(pl.arange(old_size, new_size),
                                    data_vector_segment.signal_minus[
                                                           old_s_plus - idx:])
                                self.s_size = new_size

                            _d = self.s_size - s_plus
                            _pp_spec.inactive_start = _d - idx
                            _pp_spec.inactive_stop = _d

                            _pp_spec.level = 3

                            ok = True
                            break
            else:
                for idx in xrange(1, old_s_plus):
                    if idx + s_plus <= old_s_plus \
                        and pl.all(
                            self.old_signal_plus[idx:idx + s_plus] \
                                    == data_vector_segment.signal_plus):

                        _d = self.s_size - old_s_plus
                        _pp_spec.inactive_start = _d
                        _pp_spec.inactive_stop = _d + idx

                        if _pp_spec.inactive_stop + s_plus < self.s_size:
                            _pp_spec.inactive_start_2 = \
                                        _pp_spec.inactive_stop + s_plus
                            _pp_spec.inactive_stop_2 = self.s_size
                        _pp_spec.level = 2

                        ok = True
                        break
        if ok == True and skip_frame == False:
            _pp_spec.x_data = self.x_data
            _pp_spec.y_data = self.y_data
            _pp_spec.cum_inactive = self.cum_inactive
            _pp_spec.s_size = self.s_size
            #print('PP_SPEC: ' + str(_p))

            self.pp_spec_manager.addMiniPoincarePlotSpec(_pp_spec)
            if self.idx > 0 and \
                (self.p.movie_bin_size > 0
                    and ((self.idx % self.p.movie_bin_size) == 0)):
                if len(self.pp_specs_managers) >= self.core_nums:
                    if self.p.movie_calculate_all_frames == False:
                        self.__save_frames__()
                        self.pp_specs_managers = []

                old_pp_spec_manager = self.pp_spec_manager
                self.pp_spec_manager = MiniPoincarePlotSpecManager()
                self.pp_spec_manager.movie_dir = self.__prefixed_movie_dir__
                self.pp_spec_manager.movie_name = self.__prefixed_movie_name__
                self.pp_spec_manager.movie_dpi = self.p.movie_dpi
                self.pp_spec_manager.movie_fps = self.p.movie_fps
                self.pp_spec_manager.movie_height = self.p.movie_height
                self.pp_spec_manager.movie_width = self.p.movie_width
                self.pp_spec_manager.active_color = self.active_color
                self.pp_spec_manager.inactive_color = self.inactive_color
                self.pp_spec_manager.centroid_color = self.centroid_color
                self.pp_spec_manager.active_point_size = \
                                                self.p.movie_active_size
                self.pp_spec_manager.inactive_point_size = \
                                                self.p.movie_inactive_size
                self.pp_spec_manager.centroid_point_size = \
                                                self.p.movie_centroid_size
                self.pp_spec_manager.show_plot_legends = \
                                            self.p.movie_show_plot_legends
                self.pp_spec_manager.x_label = self.p.x_label
                self.pp_spec_manager.y_label = self.p.y_label
                self.pp_spec_manager.clean_frames = self.p.movie_clean_frames
                self.pp_spec_manager.movie_title = self.p.movie_title
                self.pp_spec_manager.movie_frame_step = self.p.movie_frame_step
                self.pp_spec_manager.movie_identity_line = self.p.movie_identity_line
                self.pp_spec_manager.movie_hour_label = self.p.movie_hour_label
                self.pp_spec_manager.movie_minute_label = self.p.movie_minute_label
                self.pp_spec_manager.movie_second_label = self.p.movie_second_label
                self.pp_spec_manager.movie_time_label_in_line = self.p.movie_time_label_in_line
                self.pp_spec_manager.movie_time_label_font_size = self.p.movie_time_label_font_size
                self.pp_spec_manager.movie_time_label_prefix = self.p.movie_time_label_prefix
                self.pp_spec_manager.movie_title_font_size = self.p.movie_title_font_size
                self.pp_spec_manager.movie_axis_font_size = self.p.movie_axis_font_size
                self.pp_spec_manager.movie_axis_font = self.p.movie_axis_font
                self.pp_spec_manager.movie_title_font = self.p.movie_title_font
                self.pp_spec_manager.movie_tick_font = self.p.movie_tick_font
                self.pp_spec_manager.movie_frame_pad = self.p.movie_frame_pad
                self.pp_spec_manager.movie_create_time_label = self.p.movie_create_time_label
                self.pp_spec_manager.movie_frame_filename_with_time = self.p.movie_frame_filename_with_time

                #add all previous pp specs
                for pp_spec in old_pp_spec_manager.getMiniPoincarePlotSpecs():
                    self.pp_spec_manager.addPreviousPoincarePlotSpecMinimum(
                                                                    pp_spec)
                old_pp_spec_manager = None

                self.pp_specs_managers.append(self.pp_spec_manager)
            self.message = 'Prepare frame: %s' % (frame_name)
        elif ok == True and skip_frame == True:
            self.message = 'Skip frame %s' % (frame_name)
        elif ok == False:
            print('s_plus: ' + str(s_plus) + ' old_s_plus: ' + str(old_s_plus))
            print('old_signal_plus: ' + str(self.old_signal_plus))
            print('signal_plus:     ' + str(data_vector_segment.signal_plus))
            raise Exception('Error for idx ' + str(self.idx))
        if _pp_spec.inactive_start >= 0 and _pp_spec.inactive_stop >= 0:
            #if time array is not None use it as array for cumulative time
            if not self.time == None:
                self.cum_inactive += pl.sum(
                    self.time[
                            _pp_spec.inactive_start:_pp_spec.inactive_stop])
            else:
                self.cum_inactive += pl.sum(
                    self.x_data[
                            _pp_spec.inactive_start:_pp_spec.inactive_stop])

        self.old_signal_plus = data_vector_segment.signal_plus
        self.idx = self.idx + 1
        self._pp_spec_old = _pp_spec
 def converged(q):
     n = array([pylab.norm(obj.diff(q)) for _, obj in self.objectives])
     return pylab.all(n < 1e-3)
Beispiel #56
0
def fit_posterior(dm, region, sex, year, fast_fit=False, 
                  inconsistent_fit=False, params_to_fit=['p', 'r', 'i'], zero_re=True,
                  posteriors_only=False):
    """ Fit posterior of specified region/sex/year for specified model

    Parameters
    ----------
    dm : DiseaseJson
    region : str
      From dismod3.settings.gbd_regions, but clean()-ed
    sex : str, from dismod3.settings.gbd_sexes
    year : str, from dismod3.settings.gbd_years

    fast_fit : sample 101 draws from posterior, don't try for convergence (fast for testing)
    inconsistent_fit : fit parameters  separately
    params_to_fit : list of params to fit, if not fitting all consistently

    zero_re : bool, if true, enforce constraint that sibling area REs sum to zero
    posteriors_only : bool, if tru use data from 1997-2007 for 2005 and from 2007 on for 2010

    Example
    -------
    >>> import fit_posterior
    >>> fit_posterior.fit_posterior(2552, 'asia_east', 'male', '2005')
    """
    dir = dismod3.settings.JOB_WORKING_DIR % dm.id

    ## load the model from disk or from web
    import simplejson as json
    import data
    reload(data)

    try:
        model = data.ModelData.load(dir)
        print 'loaded data from new format from %s' % dir
    except (IOError, AssertionError):
        model = data.ModelData.from_gbd_jsons(json.loads(dm.to_json()))
        #model.save(dir)
        print 'loaded data from json, saved in new format for next time in %s' % dir

    # TODO: check for missing covariates, and have them fixed, instead of filling them with zeros

    ## next block fills in missing covariates with zero
    for col in model.input_data.columns:
        if col.startswith('x_'):
            model.input_data[col] = model.input_data[col].fillna(0.)
    # also fill all covariates missing in output template with zeros
    model.output_template = model.output_template.fillna(0)

    predict_area = dismod3.utils.clean(region)
    predict_sex = dismod3.utils.clean(sex)
    predict_year = int(year)

    ## load emp_priors dict from dm.params
    param_type = dict(i='incidence', p='prevalence', r='remission', f='excess-mortality', rr='relative-risk', pf='prevalence_x_excess-mortality', m_with='mortality')
    emp_priors = {}
    for t in 'i r p f'.split():

        # uncomment below to not use empirical prior for rate with zero data
        # if pl.all(model.input_data['data_type'] != t):
        #     continue

        #key = dismod3.utils.gbd_key_for(param_type[t], model.hierarchy.predecessors(predict_area)[0], year, sex)
        key = dismod3.utils.gbd_key_for(param_type[t], predict_area, year, sex)
        mu = dm.get_mcmc('emp_prior_mean', key)
        #mu = dm.get_mcmc('emp_prior_median', key)
        sigma = dm.get_mcmc('emp_prior_std', key)
        
        if len(mu) == 101 and len(sigma) == 101:
            emp_priors[t, 'mu'] = mu

            # TODO: determine best way to propagate prior on function
            emp_priors[t, 'sigma'] = sigma
            
            # ALT 1: scale so that the joint probability is not a
            # function of the length of the age function
            # emp_priors[t, 'sigma'] = sigma * pl.sqrt(len(sigma))

        ## update model.parameters['random_effects'] if there is information in the disease model
        expert_priors = model.parameters[t].get('random_effects', {})
        model.parameters[t]['random_effects'] = dm.get_empirical_prior(param_type[t]).get('new_alpha', {})
        model.parameters[t]['random_effects'].update(expert_priors)

        # shift random effects to make REs for observed children of predict area have mean zero
        re_mean = pl.mean([model.parameters[t]['random_effects'][area]['mu'] \
                           for area in model.hierarchy.neighbors(predict_area) \
                           if area in model.parameters[t]['random_effects']])
        for area in model.hierarchy.neighbors(predict_area):
            if area in model.parameters[t]['random_effects']:
                model.parameters[t]['random_effects'][area]['mu'] -= re_mean
            

        ## update model.parameters['fixed_effects'] if there is information in the disease model
        expert_fe_priors = model.parameters[t].get('fixed_effects', {})
        model.parameters[t]['fixed_effects'].update(dm.get_empirical_prior(param_type[t]).get('new_beta', {}))


    ## create model and priors for region/sex/year
    # select data that is about areas in this region, recent years, and sex of male or total only
    assert predict_area in model.hierarchy, 'region %s not found in area hierarchy' % predict_area
    subtree = nx.traversal.bfs_tree(model.hierarchy, predict_area)

    def is_relevant(r):
        if (r['area'] not in subtree) and r['area'] != 'all':
            return False


        if predict_year == 1990:
            if r['year_start'] > 1997:
                return False
        elif predict_year == 2005:
            if posteriors_only:
                if r['year_end'] < 1997 or r['year_start'] > 2007:
                    return False
            else:
                if r['year_end'] < 1997:
                    return False
        elif predict_year == 2010:
            if posteriors_only:
                if r['data_type'] == 'm_all':
                    # include m_all data from 2005, since 2010 is not loaded
                    if r['year_end'] < 1997:
                        return False
                else:
                    if r['year_end'] < 2007:
                        return False
            else:
                if r['year_end'] < 1997:
                    return False
        else:
            assert 0, 'Predictions for year %d not yet implemented' % predict_year

        if r['sex'] not in [predict_sex, 'total']:
            return False

        return True
    
    old_relevant_rows = [i for i, r in model.input_data.T.iteritems() \
                         if (r['area'] in subtree or r['area'] == 'all')\
                         and ((predict_year >= 1997 and r['year_end'] >= 1997) or
                              (predict_year <= 1997 and r['year_start'] <= 1997)) \
                         and r['sex'] in [predict_sex, 'total']]

    relevant_rows = model.input_data.index[model.input_data.apply(is_relevant, axis=1)]

    if predict_year == 1990:
        assert pl.all(relevant_rows == old_relevant_rows), "relevant rows should be the same in new and old implementation for 1990"

    if not posteriors_only:
        assert pl.all(relevant_rows == old_relevant_rows), "relevant rows should be the same in new and old implementation when posteriors_only is False"
    
    model.input_data = model.input_data.ix[relevant_rows]

    # replace area 'all' with predict_area
    model.input_data['area'][model.input_data['area'] == 'all'] = predict_area

    if inconsistent_fit:
        # generate fits for requested parameters inconsistently
        for t in params_to_fit:
            model.vars += ism.age_specific_rate(model, t,
                                            reference_area=predict_area, reference_sex=predict_sex, reference_year=predict_year,
                                            mu_age=None,
                                            mu_age_parent=emp_priors.get((t, 'mu')),
                                            sigma_age_parent=emp_priors.get((t, 'sigma')),
                                            rate_type=(t == 'rr') and 'log_normal' or 'neg_binom',
                                            zero_re=zero_re)
            if fast_fit:
                dismod3.fit.fit_asr(model, t, iter=101, burn=0, thin=1, tune_interval=100)
            else:
                dismod3.fit.fit_asr(model, t, iter=iter, burn=burn, thin=thin, tune_interval=100)

    else:
        model.vars += ism.consistent(model,
                                     reference_area=predict_area, reference_sex=predict_sex, reference_year=predict_year,
                                     priors=emp_priors, zero_re=zero_re)

        ## fit model to data
        if fast_fit:
            dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, 105, 0, 1, 100)
        else:
            dm.map, dm.mcmc = dismod3.fit.fit_consistent(model, iter=iter, burn=burn, thin=thin, tune_interval=100, verbose=True)


    # generate estimates
    posteriors = {}
    for t in 'i r f p rr pf m_with X'.split():
        if t in model.vars:
            if t in model.parameters and 'level_bounds' in model.parameters[t]:
                lower=model.parameters[t]['level_bounds']['lower']
                upper=model.parameters[t]['level_bounds']['upper']
            else:
                lower=0
                upper=pl.inf
            posteriors[t] = covariate_model.predict_for(model,
                                                        model.parameters.get(t, {}),
                                                        predict_area, predict_sex, predict_year,
                                                        predict_area, predict_sex, predict_year,
                                                        True,  # population weighted averages
                                                        model.vars[t], lower, upper)
    try:
        graphics.plot_fit(model, vars, emp_priors, {})
        pl.savefig(dir + '/image/posterior-%s+%s+%s.png'%(predict_area, predict_sex, predict_year))
    except Exception, e:
        print 'Error generating output graphics'
        print e
Beispiel #57
0
def assert_almost_equal(x, y):
    log_offset_diff = pl.log(x + 1.e-4) - pl.log(y + 1.e-4)
    assert pl.all(log_offset_diff**2 <= 1.e-4), 'expected approximate equality, found means of:\n  %s\n  %s' % (x.mean(1), y.mean(1))
Beispiel #58
0
def draw(board):
    return all(ravel(board != '.'))