예제 #1
0
파일: plot.py 프로젝트: tXiao95/dismod_mr
def plot_one_ppc(model, t):
    """ plot data and posterior predictive check
    
    :Parameters:
      - `model` : data.ModelData
      - `t` : str, data type of 'i', 'r', 'f', 'p', 'rr', 'm', 'X', 'pf', 'csmr'
    
    """
    stats = model.vars[t]['p_pred'].stats()
    if stats == None:
        return

    pl.figure()
    pl.title(t)

    x = model.vars[t]['p_obs'].value.__array__()
    y = x - stats['quantiles'][50]
    yerr = [stats['quantiles'][50] - pl.atleast_2d(stats['95% HPD interval'])[:,0],
            pl.atleast_2d(stats['95% HPD interval'])[:,1] - stats['quantiles'][50]]
    pl.errorbar(x, y, yerr=yerr, fmt='ko', mec='w', capsize=0,
                label='Obs vs Residual (Obs - Pred)')

    pl.xlabel('Observation')
    pl.ylabel('Residual (observation-prediction)')

    pl.grid()
    l,r,b,t = pl.axis()
    pl.hlines([0], l, r)
    pl.axis([l, r, y.min()*1.1 - y.max()*.1, -y.min()*.1 + y.max()*1.1])
예제 #2
0
파일: graphics.py 프로젝트: aflaxman/gbd
def plot_one_ppc(model, t):
    """ plot data and posterior predictive check
    
    :Parameters:
      - `model` : data.ModelData
      - `t` : str, data type of 'i', 'r', 'f', 'p', 'rr', 'm', 'X', 'pf', 'csmr'
    
    """
    stats = model.vars[t]['p_pred'].stats()
    if stats == None:
        return

    pl.figure()
    pl.title(t)

    x = model.vars[t]['p_obs'].value.__array__()
    y = x - stats['quantiles'][50]
    yerr = [stats['quantiles'][50] - pl.atleast_2d(stats['95% HPD interval'])[:,0],
            pl.atleast_2d(stats['95% HPD interval'])[:,1] - stats['quantiles'][50]]
    pl.errorbar(x, y, yerr=yerr, fmt='ko', mec='w', capsize=0,
                label='Obs vs Residual (Obs - Pred)')

    pl.xlabel('Observation')
    pl.ylabel('Residual (observation-prediction)')

    pl.grid()
    l,r,b,t = pl.axis()
    pl.hlines([0], l, r)
    pl.axis([l, r, y.min()*1.1 - y.max()*.1, -y.min()*.1 + y.max()*1.1])
예제 #3
0
 def invert_component(self, cls, w, z, h):
     """
     Invert a  single PLCA component to separated features in the original feature space.
     """
     w = P.atleast_2d(w)
     if cls == plca.PLCA: w = w.T
     h = P.atleast_2d(h)
     return cls.reconstruct(w, z, h)
예제 #4
0
 def invert_component(self, cls, w, z, h):
     """
     Invert a  single PLCA component to separated features in the original feature space.
     """
     w = P.atleast_2d(w)
     if cls==plca.PLCA: w = w.T 
     h = P.atleast_2d(h)
     return cls.reconstruct(w,z,h)
예제 #5
0
def new_bad_model(F):
    """ Results in a matrix with shape matching X, but all rows sum to 1"""
    N, T, J = F.shape
    pi = pl.zeros_like(F)
    for t in range(T):
        u = F[:,t,:].var(axis=0)
        u /= pl.sqrt(pl.dot(u,u))
        F_t_par = pl.dot(pl.atleast_2d(pl.dot(F[:,t,:], u)).T, pl.atleast_2d(u))
        F_t_perp = F[:,t,:] - F_t_par
        for n in range(N):
            alpha = (1 - F_t_perp[n].sum()) / F_t_par[n].sum()
            pi[n,t,:] = F_t_perp[n,:] + alpha*F_t_par[n,:]
    return pi
예제 #6
0
def new_bad_model(F):
    """ Results in a matrix with shape matching X, but all rows sum to 1"""
    N, T, J = F.shape
    pi = pl.zeros_like(F)
    for t in range(T):
        u = F[:, t, :].var(axis=0)
        u /= pl.sqrt(pl.dot(u, u))
        F_t_par = pl.dot(
            pl.atleast_2d(pl.dot(F[:, t, :], u)).T, pl.atleast_2d(u))
        F_t_perp = F[:, t, :] - F_t_par
        for n in range(N):
            alpha = (1 - F_t_perp[n].sum()) / F_t_par[n].sum()
            pi[n, t, :] = F_t_perp[n, :] + alpha * F_t_par[n, :]
    return pi
예제 #7
0
 def _phase_map(self):
     self.dphi = (2*P.pi * self.nhop * P.arange(self.nfft/2+1)) / self.nfft
     A = P.diff(P.angle(self.STFT),1) # Complete Phase Map
     U = P.c_[P.angle(self.STFT[:,0]), A - P.atleast_2d(self.dphi).T ]
     U = U - P.np.round(U/(2*P.pi))*2*P.pi
     self.dPhi = U
     return U
예제 #8
0
파일: graphics.py 프로젝트: aflaxman/gbd
def plot_viz_of_stochs(vars, viz_func, figsize=(8,6)):
    """ Plot autocorrelation for all stochs in a dict or dict of dicts
    
    :Parameters:
      - `vars` : dictionary
      - `viz_func` : visualazation function such as ``acorr``, ``show_trace``, or ``hist``
      - `figsize` : tuple, size of figure
    
    """
    pl.figure(figsize=figsize)

    cells, stochs = tally_stochs(vars)

    # for each stoch, make an autocorrelation plot for each dimension
    rows = pl.floor(pl.sqrt(cells))
    cols = pl.ceil(cells/rows)

    tile = 1
    for s in sorted(stochs, key=lambda s: s.__name__):
        trace = s.trace()
        if len(trace.shape) == 1:
            trace = trace.reshape((len(trace), 1))
        for d in range(len(pl.atleast_1d(s.value))):
            pl.subplot(rows, cols, tile)
            viz_func(pl.atleast_2d(trace)[:, d])
            pl.title('\n\n%s[%d]'%(s.__name__, d), va='top', ha='center', fontsize=8)
            tile += 1
예제 #9
0
 def _pvoc2(self, X_hat, Phi_hat=None, R=None):
     """
     ::
       alternate (batch) implementation of phase vocoder - time-stretch
       inputs:
         X_hat - estimate of signal magnitude
         [Phi_hat] - estimate of signal phase
         [R] - resynthesis hop ratio
       output:
         updates self.X_hat with modified complex spectrum
     """
     N, W, H = self.nfft, self.wfft, self.nhop
     R = 1.0 if R is None else R
     dphi = P.atleast_2d((2*P.pi * H * P.arange(N/2+1)) / N).T
     print "Phase Vocoder Resynthesis...", N, W, H, R
     A = P.angle(self.STFT) if Phi_hat is None else Phi_hat
     U = P.diff(A,1) - dphi
     U = U - P.np.round(U/(2*P.pi))*2*P.pi
     t = P.arange(0,n_cols,R)
     tf = t - P.floor(t)
     phs = P.c_[A[:,0], U] 
     phs += U[:,idx[1]] + dphi # Problem, what is idx ?
     Xh = (1-tf)*Xh[:-1] + tf*Xh[1:]
     Xh *= P.exp( 1j * phs)
     self.X_hat = Xh
예제 #10
0
 def _pvoc2(self, X_hat, Phi_hat=None, R=None):
     """
     ::
       alternate (batch) implementation of phase vocoder - time-stretch
       inputs:
         X_hat - estimate of signal magnitude
         [Phi_hat] - estimate of signal phase
         [R] - resynthesis hop ratio
       output:
         updates self.X_hat with modified complex spectrum
     """
     N, W, H = self.nfft, self.wfft, self.nhop
     R = 1.0 if R is None else R
     dphi = P.atleast_2d((2 * P.pi * H * P.arange(N / 2 + 1)) / N).T
     print("Phase Vocoder Resynthesis...", N, W, H, R)
     A = P.angle(self.STFT) if Phi_hat is None else Phi_hat
     U = P.diff(A, 1) - dphi
     U = U - P.np.round(U / (2 * P.pi)) * 2 * P.pi
     t = P.arange(0, n_cols, R)
     tf = t - P.floor(t)
     phs = P.c_[A[:, 0], U]
     phs += U[:, idx[1]] + dphi  # Problem, what is idx ?
     Xh = (1 - tf) * Xh[:-1] + tf * Xh[1:]
     Xh *= P.exp(1j * phs)
     self.X_hat = Xh
예제 #11
0
def plot_viz_of_stochs(vars, viz_func, figsize=(8, 6)):
    """ Plot autocorrelation for all stochs in a dict or dict of dicts
    
    :Parameters:
      - `vars` : dictionary
      - `viz_func` : visualazation function such as ``acorr``, ``show_trace``, or ``hist``
      - `figsize` : tuple, size of figure
    
    """
    pl.figure(figsize=figsize)

    cells, stochs = tally_stochs(vars)

    # for each stoch, make an autocorrelation plot for each dimension
    rows = pl.floor(pl.sqrt(cells))
    cols = pl.ceil(cells / rows)

    tile = 1
    for s in sorted(stochs, key=lambda s: s.__name__):
        trace = s.trace()
        if len(trace.shape) == 1:
            trace = trace.reshape((len(trace), 1))
        for d in range(len(pl.atleast_1d(s.value))):
            pl.subplot(rows, cols, tile)
            viz_func(pl.atleast_2d(trace)[:, d])
            pl.title('\n\n%s[%d]' % (s.__name__, d),
                     va='top',
                     ha='center',
                     fontsize=8)
            tile += 1
예제 #12
0
 def stack_vectors(data, win=1, hop=1, zero_pad=True):
     """
     ::
        create an overlapping stacked vector sequence from a series of vectors
         data - row-wise multidimensional data to stack
         win  - number of consecutive vectors to stack [1]
         hop  - number of vectors to advance per stack [1]
         zero_pad - zero pad if incomplete stack at end 
     """
     data = pylab.atleast_2d(data)
     nrows, dim = data.shape
     hop = min(hop, nrows)
     nvecs = nrows / int(hop) if not zero_pad else int(
         pylab.ceil(nrows / float(hop)))
     features = pylab.zeros((nvecs, win * dim))
     i = 0
     while i < nrows - win + 1:
         features[i / hop, :] = data[i:i + win, :].reshape(1, -1)
         i += hop
     if i / hop < nvecs:
         x = data[i::, :].reshape(1, -1)
         features[i / hop, :] = pylab.c_[x,
                                         pylab.zeros(
                                             (1, win * dim - x.shape[1]))]
     return features
예제 #13
0
 def _phase_map(self):
     self.dphi = (2*P.pi * self.nhop * P.arange(self.nfft/2+1)) / self.nfft
     A = P.diff(P.angle(self.STFT),1) # Complete Phase Map
     U = P.c_[P.angle(self.STFT[:,0]), A - P.atleast_2d(self.dphi).T ]
     U = U - P.np.round(U/(2*P.pi))*2*P.pi
     self.dPhi = U
     return U
예제 #14
0
    def imagesc2(self,
                 data,
                 newfig=True,
                 str='',
                 ax=1,
                 cbar=1,
                 txt=False,
                 txtprec=2,
                 txtsz=18,
                 txtnz=0,
                 txtrev=0,
                 labels=None,
                 **kwargs):
        kwargs = self.check_kwargs(kwargs, DEFAULT_IMAGESC_KWARGS)
        if newfig:
            fig = P.figure()
        data = P.copy(data)
        if len(data.shape) < 2:
            data = P.atleast_2d(data)

    #    if txtnz:
    #        kwargs['vmin'] = data[where(data>txtnz)].min()

        P.imshow(data, **kwargs)
        if cbar: P.colorbar()
        if labels is not None:
            P.xticks(np.arange(len(labels)), labels)
            P.yticks(np.arange(len(labels)), labels)
        if txt:
            thr = data.min() + (data.max() - data.min()) / 2.0
            for a in range(data.shape[0]):
                for b in range(data.shape[1]):
                    if data[a, b] < thr:
                        col = P.array([1, 1, 1])
                    else:
                        col = P.array([0, 0, 0])
                    if txtrev:
                        col = 1 - col
                    d = data[a, b].round(txtprec)
                    if txtprec == 0:
                        d = int(d)
                    if not txtnz or txtnz and d > txtnz:  # scale only non-zero values
                        P.text(b - 0.125,
                               a + 0.125,
                               d,
                               color=col,
                               fontsize=txtsz)
        plt.title(str, fontsize=12)
        return fig
예제 #15
0
 def _ichroma(self, V, **kwargs):
     """
     ::
     
         Inverse chromagram transform. Make a signal from a folded constant-Q transform.
     """
     if not (self._have_hcqft or self._have_cqft):
         return None
     a,b = self.HCQFT.shape if self._have_hcqft else self.CQFT.shape
     complete_octaves = a/self.nbpo # integer division, number of complete octaves
     if P.remainder(a,self.nbpo):
         complete_octaves += 1
     X = P.repeat(V, complete_octaves, 0)[:a,:] # truncate if necessary
     X /= X.max()
     X *= P.atleast_2d(P.linspace(1,0,X.shape[0])).T # weight the spectrum
     self.x_hat = self._icqft(X, **kwargs)
     return self.x_hat
예제 #16
0
 def _ichroma(self, V, **kwargs):
     """
     ::
     
         Inverse chromagram transform. Make a signal from a folded constant-Q transform.
     """
     if not (self._have_hcqft or self._have_cqft):
         return None
     a,b = self.HCQFT.shape if self._have_hcqft else self.CQFT.shape
     complete_octaves = a/self.nbpo # integer division, number of complete octaves
     if P.remainder(a,self.nbpo):
         complete_octaves += 1
     X = P.repeat(V, complete_octaves, 0)[:a,:] # truncate if necessary
     X /= X.max()
     X *= P.atleast_2d(P.linspace(1,0,X.shape[0])).T # weight the spectrum
     self.x_hat = self._icqft(X, **kwargs)
     return self.x_hat
예제 #17
0
 def stack_vectors(data, win=1, hop=1, zero_pad=True):
     """
     ::
        create an overlapping stacked vector sequence from a series of vectors
         data - row-wise multidimensional data to stack
         win  - number of consecutive vectors to stack [1]
         hop  - number of vectors to advance per stack [1]
         zero_pad - zero pad if incomplete stack at end 
     """
     data = pylab.atleast_2d(data)
     nrows, dim = data.shape
     hop = min(hop, nrows)
     nvecs = nrows / int(hop) if not zero_pad else int(pylab.ceil(nrows / float(hop)))
     features = pylab.zeros((nvecs, win * dim))
     i = 0
     while i < nrows - win + 1:
         features[i / hop, :] = data[i : i + win, :].reshape(1, -1)
         i += hop
     if i / hop < nvecs:
         x = data[i::, :].reshape(1, -1)
         features[i / hop, :] = pylab.c_[x, pylab.zeros((1, win * dim - x.shape[1]))]
     return features
예제 #18
0
def plot_one_effects(model, data_type):
    """ Plot random effects and fixed effects.
    
    :Parameters:
      - `model` : data.ModelData
      - `data_types` : str, one of 'i', 'r', 'f', 'p', 'rr', 'pf'
      
    """
    vars = model.vars[data_type]
    hierarchy = model.hierarchy

    pl.figure(figsize=(22, 17))
    for i, (covariate, effect) in enumerate([['U', 'alpha'], ['X', 'beta']]):
        if covariate not in vars:
            continue

        cov_name = list(vars[covariate].columns)

        if isinstance(vars.get(effect), mc.Stochastic):
            pl.subplot(1, 2, i + 1)
            pl.title('%s_%s' % (effect, data_type))

            stats = vars[effect].stats()
            if stats:
                if effect == 'alpha':
                    index = sorted(
                        pl.arange(len(cov_name)),
                        key=lambda i: str(cov_name[
                            i] in hierarchy and nx.shortest_path(
                                hierarchy, 'all', cov_name[i]) or cov_name[i]))
                elif effect == 'beta':
                    index = pl.arange(len(cov_name))

                x = pl.atleast_1d(stats['mean'])
                y = pl.arange(len(x))

                xerr = pl.array([
                    x - pl.atleast_2d(stats['95% HPD interval'])[:, 0],
                    pl.atleast_2d(stats['95% HPD interval'])[:, 1] - x
                ])
                pl.errorbar(x[index],
                            y[index],
                            xerr=xerr[:, index],
                            fmt='bs',
                            mec='w')

                l, r, b, t = pl.axis()
                pl.vlines([0], b - .5, t + .5)
                pl.hlines(y, l, r, linestyle='dotted')
                pl.xticks([l, 0, r])
                pl.yticks([])
                for i in index:
                    spaces = cov_name[i] in hierarchy and len(
                        nx.shortest_path(hierarchy, 'all', cov_name[i])) or 0
                    pl.text(l,
                            y[i],
                            '%s%s' % (' * ' * spaces, cov_name[i]),
                            va='center',
                            ha='left')
                pl.axis([l, r, -.5, t + .5])

        if isinstance(vars.get(effect), list):
            pl.subplot(1, 2, i + 1)
            pl.title('%s_%s' % (effect, data_type))
            index = sorted(pl.arange(len(cov_name)),
                           key=lambda i:
                           str(cov_name[i] in hierarchy and nx.shortest_path(
                               hierarchy, 'all', cov_name[i]) or cov_name[i]))

            for y, i in enumerate(index):
                n = vars[effect][i]
                if isinstance(n, mc.Stochastic) or isinstance(
                        n, mc.Deterministic):
                    stats = n.stats()
                    if stats:
                        x = pl.atleast_1d(stats['mean'])

                        xerr = pl.array([
                            x - pl.atleast_2d(stats['95% HPD interval'])[:, 0],
                            pl.atleast_2d(stats['95% HPD interval'])[:, 1] - x
                        ])
                        pl.errorbar(x, y, xerr=xerr, fmt='bs', mec='w')

            l, r, b, t = pl.axis()
            pl.vlines([0], b - .5, t + .5)
            pl.hlines(y, l, r, linestyle='dotted')
            pl.xticks([l, 0, r])
            pl.yticks([])

            for y, i in enumerate(index):
                spaces = cov_name[i] in hierarchy and len(
                    nx.shortest_path(hierarchy, 'all', cov_name[i])) or 0
                pl.text(l,
                        y,
                        '%s%s' % (' * ' * spaces, cov_name[i]),
                        va='center',
                        ha='left')

            pl.axis([l, r, -.5, t + .5])

            if effect == 'alpha':
                effect_str = ''
                for sigma in vars['sigma_alpha']:
                    stats = sigma.stats()
                    if stats:
                        effect_str += '%s = %.3f\n' % (sigma.__name__,
                                                       stats['mean'])
                    else:
                        effect_str += '%s = %.3f\n' % (sigma.__name__,
                                                       sigma.value)
                pl.text(r, t, effect_str, va='top', ha='right')
            elif effect == 'beta':
                effect_str = ''
                if 'eta' in vars:
                    eta = vars['eta']
                    stats = eta.stats()
                    if stats:
                        effect_str += '%s = %.3f\n' % (eta.__name__,
                                                       stats['mean'])
                    else:
                        effect_str += '%s = %.3f\n' % (eta.__name__, eta.value)
                pl.text(r, t, effect_str, va='top', ha='right')
예제 #19
0
def generate_prior_potentials(rate_vars, prior_str, age_mesh):
    """
    augment the rate_vars dict to include a list of potentials that model priors on  rate_vars['rate_stoch']

    prior_str may have entries in the following format:
      smooth <tau> [<age_start> <age_end>]
      zero <age_start> <age_end>
      confidence <mean> <tau>
      increasing <age_start> <age_end>
      decreasing <age_start> <age_end>
      convex_up <age_start> <age_end>
      convex_down <age_start> <age_end>
      unimodal <age_start> <age_end>
      value <mean> <tau> [<age_start> <age_end>]
      at_least <value>
      at_most <value>
      max_at_most <value>
            
    for example: 'smooth .1, zero 0 5, zero 95 100'

    age_mesh[i] indicates what age the value of rate[i] corresponds to
    """
    def derivative_sign_prior(rate, prior, deriv, sign):
        age_start = int(prior[1])
        age_end = int(prior[2])
        age_indices = indices_for_range(age_mesh, age_start, age_end)

        @mc.potential(name='deriv_sign_{%d,%d,%d,%d}^%s' %
                      (deriv, sign, age_start, age_end, str(rate)))
        def deriv_sign_rate(f=rate,
                            age_indices=age_indices,
                            tau=1.e14,
                            deriv=deriv,
                            sign=sign):
            df = pl.diff(f[age_indices], deriv)
            return mc.normal_like(pl.absolute(df) * (sign * df < 0), 0., tau)

        return [deriv_sign_rate]

    priors = []
    rate = rate_vars['rate_stoch']
    rate_vars['bounds_func'] = lambda f, age: f
    for line in prior_str.split(PRIOR_SEP_STR):
        prior = line.strip().split()
        if len(prior) == 0:
            continue
        if prior[0] == 'smooth':
            pass  # handle this after applying all level bounds
        elif prior[0] == 'heterogeneity':
            # prior affects dispersion term of model; handle as a special case
            continue

        elif prior[0] == 'increasing':
            priors += derivative_sign_prior(rate, prior, deriv=1, sign=1)
        elif prior[0] == 'decreasing':
            priors += derivative_sign_prior(rate, prior, deriv=1, sign=-1)
        elif prior[0] == 'convex_down':
            priors += derivative_sign_prior(rate, prior, deriv=2, sign=-1)
        elif prior[0] == 'convex_up':
            priors += derivative_sign_prior(rate, prior, deriv=2, sign=1)

        elif prior[0] == 'unimodal':
            age_start = int(prior[1])
            age_end = int(prior[2])
            age_indices = indices_for_range(age_mesh, age_start, age_end)

            @mc.potential(name='unimodal_{%d,%d}^%s' %
                          (age_start, age_end, str(rate)))
            def unimodal_rate(f=rate, age_indices=age_indices, tau=1.e5):
                df = pl.diff(f[age_indices])
                sign_changes = pl.find((df[:-1] > NEARLY_ZERO)
                                       & (df[1:] < -NEARLY_ZERO))
                sign = pl.ones(len(age_indices) - 2)
                if len(sign_changes) > 0:
                    change_age = sign_changes[len(sign_changes) / 2]
                    sign[change_age:] = -1.
                return -tau * pl.dot(pl.absolute(df[:-1]),
                                     (sign * df[:-1] < 0))

            priors += [unimodal_rate]

        elif prior[0] == 'max_at_least':
            val = float(prior[1])

            @mc.potential(name='max_at_least_{%f}^{%s}' % (val, str(rate)))
            def max_at_least(cur_max=rate, at_least=val, tau=(.001 * val)**-2):
                return -tau * (cur_max - at_least)**2 * (cur_max < at_least)

            priors += [max_at_least]

        elif prior[0] == 'level_value':
            val = float(prior[1]) + 1.e-9

            if len(prior) == 4:
                age_start = int(prior[2])
                age_end = int(prior[3])
            else:
                age_start = 0
                age_end = MAX_AGE
            age_indices = indices_for_range(age_mesh, age_start, age_end)

            def new_bounds_func(f,
                                age,
                                val=val,
                                age_start=age_start,
                                age_end=age_end,
                                prev_bounds_func=rate_vars['bounds_func']):
                age = pl.array(age)
                return pl.where((age >= age_start) & (age <= age_end), val,
                                prev_bounds_func(f, age))

            rate_vars['bounds_func'] = new_bounds_func

        elif prior[0] == 'at_most':
            val = float(prior[1])

            def new_bounds_func(f,
                                age,
                                val=val,
                                prev_bounds_func=rate_vars['bounds_func']):
                return pl.minimum(prev_bounds_func(f, age), val)

            rate_vars['bounds_func'] = new_bounds_func

        elif prior[0] == 'at_least':
            val = float(prior[1])

            def new_bounds_func(f,
                                age,
                                val=val,
                                prev_bounds_func=rate_vars['bounds_func']):
                return pl.maximum(prev_bounds_func(f, age), val)

            rate_vars['bounds_func'] = new_bounds_func

        else:
            raise KeyError, 'Unrecognized prior: %s' % prior_str

    # update rate stoch with the bounds func from the priors
    # TODO: create this before smoothing, so that smoothing takes levels into account
    @mc.deterministic(name='%s_w_bounds' % rate_vars['rate_stoch'].__name__)
    def mu_bounded(mu=rate_vars['rate_stoch'],
                   bounds_func=rate_vars['bounds_func']):
        return bounds_func(mu,
                           pl.arange(101))  # FIXME: don't hardcode age range

    rate_vars['unbounded_rate'] = rate_vars['rate_stoch']
    rate_vars['rate_stoch'] = mu_bounded
    rate = rate_vars['rate_stoch']

    # add potential to encourage rate to look like level bounds
    @mc.potential(name='%s_potential' % rate_vars['rate_stoch'].__name__)
    def mu_potential(mu1=rate_vars['unbounded_rate'],
                     mu2=rate_vars['rate_stoch']):
        return mc.normal_like(mu1, mu2, .0001**-2)

    rate_vars['rate_potential'] = mu_potential

    # add smoothing prior to the rate with level bounds
    for line in prior_str.split(PRIOR_SEP_STR):
        prior = line.strip().split()
        if len(prior) == 0:
            continue
        if prior[0] == 'smooth':
            scale = float(prior[1])

            if len(prior) == 4:
                age_start = int(prior[2])
                age_end = int(prior[3])
            else:
                age_start = 0
                age_end = MAX_AGE

            age_indices = indices_for_range(pl.arange(MAX_AGE), age_start,
                                            age_end)

            from pymc.gp.cov_funs import matern
            a = pl.atleast_2d(age_indices).T
            C = matern.euclidean(a, a, diff_degree=2, amp=10., scale=scale)

            @mc.potential(name='smooth_{%d,%d}^%s' %
                          (age_start, age_end, str(rate)))
            def smooth_rate(f=rate, age_indices=age_indices, C=C):
                log_rate = pl.log(pl.maximum(f, NEARLY_ZERO))
                return mc.mv_normal_cov_like(log_rate[age_indices] -
                                             log_rate[age_indices].mean(),
                                             pl.zeros_like(age_indices),
                                             C=C)

            priors += [smooth_rate]
            print 'added smoothing potential for %s' % smooth_rate

    rate_vars['priors'] = priors
예제 #20
0
def setup(dm, key, data_list=[], rate_stoch=None, emp_prior={}, lower_bound_data=[]):
    """ Generate the PyMC variables for a negative-binomial model of
    a single rate function

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the negative binomial liklihood function

    rate_stoch : pymc.Stochastic, optional
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).
      This is used to link rate stochs into a larger model,
      for example.

    emp_prior : dict, optional
      the empirical prior dictionary, retrieved from the disease model
      if appropriate by::

          >>> t, r, y, s = dismod3.utils.type_region_year_sex_from_key(key)
          >>> emp_prior = dm.get_empirical_prior(t)

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      rate model.  vars['rate_stoch'] is of particular
      relevance; this is what is used to link the rate model
      into more complicated models, like the generic disease model.
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    param_mesh = dm.get_param_age_mesh()

    if pl.any(pl.diff(est_mesh) != 1):
        raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1'

    # calculate effective sample size for all data and lower bound data
    dm.calc_effective_sample_size(data_list)
    dm.calc_effective_sample_size(lower_bound_data)

    # generate regional covariates
    covariate_dict = dm.get_covariates()
    derived_covariate = dm.get_derived_covariate_values()
    X_region, X_study = regional_covariates(key, covariate_dict, derived_covariate)

    # use confidence prior from prior_str  (only for posterior estimate, this is overridden below for empirical prior estimate)
    mu_delta = 1000.
    sigma_delta = 10.
    mu_log_delta = 3.
    sigma_log_delta = .25
    from dismod3.settings import PRIOR_SEP_STR
    for line in dm.get_priors(key).split(PRIOR_SEP_STR):
        prior = line.strip().split()
        if len(prior) == 0:
            continue
        if prior[0] == 'heterogeneity':
            # originally designed for this:
            mu_delta = float(prior[1])
            sigma_delta = float(prior[2])

            # HACK: override design to set sigma_log_delta,
            # .25 = very, .025 = moderately, .0025 = slightly
            if float(prior[2]) > 0:
                sigma_log_delta = .025 / float(prior[2])


    # use the empirical prior mean if it is available
    if len(set(emp_prior.keys()) & set(['alpha', 'beta', 'gamma'])) == 3:
        mu_alpha = pl.array(emp_prior['alpha'])
        sigma_alpha = pl.array(emp_prior['sigma_alpha'])
        alpha = pl.array(emp_prior['alpha']) # TODO: make this stochastic
        vars.update(region_coeffs=alpha)

        beta = pl.array(emp_prior['beta']) # TODO: make this stochastic
        sigma_beta = pl.array(emp_prior['sigma_beta'])
        vars.update(study_coeffs=beta)

        mu_gamma = pl.array(emp_prior['gamma'])
        sigma_gamma = pl.array(emp_prior['sigma_gamma'])

        # Do not inform dispersion parameter from empirical prior stage
        # if 'delta' in emp_prior:
        #    mu_delta = emp_prior['delta']
        #    if 'sigma_delta' in emp_prior:
        #        sigma_delta = emp_prior['sigma_delta']
    else:
        import dismod3.regional_similarity_matrices as similarity_matrices
        n = len(X_region)
        mu_alpha = pl.zeros(n)
        sigma_alpha = .025  # TODO: make this a hyperparameter, with a traditional prior, like inverse gamma
        C_alpha = similarity_matrices.regions_nested_in_superregions(n, sigma_alpha)

        # use alternative region effect covariance structure if requested
        region_prior_key = 'region_effects'
        if region_prior_key in dm.params:
            if dm.params[region_prior_key] == 'uninformative':
                C_alpha = similarity_matrices.uninformative(n, sigma_alpha)

        region_prior_key = 'region_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]
        if region_prior_key in dm.params:
            if dm.params[region_prior_key] == 'uninformative':
                C_alpha = similarity_matrices.regions_nested_in_superregions(n, dm.params[region_prior_key]['std'])

        # add informative prior for sex effect if requested
        sex_prior_key = 'sex_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]
        if sex_prior_key in dm.params:
            print 'adjusting prior on sex effect coefficient for %s' % key
            mu_alpha[n-1] = pl.log(dm.params[sex_prior_key]['mean'])
            sigma_sex = (pl.log(dm.params[sex_prior_key]['upper_ci']) - pl.log(dm.params[sex_prior_key]['lower_ci'])) / (2*1.96)
            C_alpha[n-1, n-1]= sigma_sex**2.

        # add informative prior for time effect if requested
        time_prior_key = 'time_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if time_prior_key in dm.params:
            print 'adjusting prior on time effect coefficient for %s' % key
            mu_alpha[n-2] = pl.log(dm.params[time_prior_key]['mean'])
            sigma_time = (pl.log(dm.params[time_prior_key]['upper_ci']) - pl.log(dm.params[time_prior_key]['lower_ci'])) / (2*1.96)
            C_alpha[n-2, n-2]= sigma_time**2.
        
        #C_alpha = similarity_matrices.all_related_equally(n, sigma_alpha)
        alpha = mc.MvNormalCov('region_coeffs_%s' % key, mu=mu_alpha,
                            C=C_alpha,
                            value=mu_alpha)
        vars.update(region_coeffs=alpha, region_coeffs_step_cov=.005*C_alpha)

        mu_beta = pl.zeros(len(X_study))
        sigma_beta = .1

        # add informative prior for beta effect if requested
        prior_key = 'beta_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if prior_key in dm.params:
            print 'adjusting prior on beta effect coefficients for %s' % key
            mu_beta = pl.array(dm.params[prior_key]['mean'])
            sigma_beta = pl.array(dm.params[prior_key]['std'])

        beta = mc.Normal('study_coeffs_%s' % key, mu=mu_beta, tau=sigma_beta**-2., value=mu_beta)
        vars.update(study_coeffs=beta)

        mu_gamma = 0.*pl.ones(len(est_mesh))
        sigma_gamma = 2.*pl.ones(len(est_mesh))

        # add informative prior for gamma effect if requested
        prior_key = 'gamma_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if prior_key in dm.params:
            print 'adjusting prior on gamma effect coefficients for %s' % key
            mu_gamma = pl.array(dm.params[prior_key]['mean'])
            sigma_gamma = pl.array(dm.params[prior_key]['std'])

        # always use dispersed prior on delta for empirical prior phase
        mu_log_delta = 3.
        sigma_log_delta = .25
        # add informative prior for delta effect if requested
        prior_key = 'delta_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if prior_key in dm.params:
            print 'adjusting prior on delta effect coefficients for %s' % key
            mu_log_delta = dm.params[prior_key]['mean']
            sigma_log_delta = dm.params[prior_key]['std']

    mu_zeta = 0.
    sigma_zeta = .25
    # add informative prior for zeta effect if requested
    prior_key = 'zeta_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
    if prior_key in dm.params:
        print 'adjusting prior on zeta effect coefficients for %s' % key
        mu_zeta = dm.params[prior_key]['mean']
        sigma_zeta = dm.params[prior_key]['std']
    
    if mu_delta != 0.:
        if sigma_delta != 0.:
            log_delta = mc.Normal('log_dispersion_%s' % key, mu=mu_log_delta, tau=sigma_log_delta**-2, value=3.)
            zeta = mc.Normal('zeta_%s'%key, mu=mu_zeta, tau=sigma_zeta**-2, value=mu_zeta)
            delta = mc.Lambda('dispersion_%s' % key, lambda x=log_delta: 50. + 10.**x)
            vars.update(dispersion=delta, log_dispersion=log_delta, zeta=zeta, dispersion_step_sd=.1*log_delta.parents['tau']**-.5)
        else:
            delta = mc.Lambda('dispersion_%s' % key, lambda x=mu_delta: mu_delta)
            vars.update(dispersion=delta)
        
    else:
        delta = mc.Lambda('dispersion_%s' % key, lambda mu=mu_delta: 0)
        vars.update(dispersion=delta)

    if len(sigma_gamma) == 1:
        sigma_gamma = sigma_gamma[0]*pl.ones(len(est_mesh))

    # create varible for interpolated rate;
    # also create variable for age-specific rate function, if it does not yet exist
    if rate_stoch:
        # if the rate_stoch already exists, for example prevalence in the generic model,
        # we use it to back-calculate mu and eventually gamma
        mu = rate_stoch

        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(mu=mu, Xa=X_region, Xb=X_study, alpha=alpha, beta=beta):
            return pl.log(pl.maximum(dismod3.settings.NEARLY_ZERO, mu)) - pl.dot(alpha, Xa) - pl.dot(beta, Xb)

        @mc.potential(name='age_coeffs_potential_%s' % key)
        def gamma_potential(gamma=gamma, mu_gamma=mu_gamma, tau_gamma=1./sigma_gamma[param_mesh]**2, param_mesh=param_mesh):
            return mc.normal_like(gamma[param_mesh], mu_gamma[param_mesh], tau_gamma)

        vars.update(rate_stoch=mu, age_coeffs=gamma, age_coeffs_potential=gamma_potential)
    else:
        # if the rate_stoch does not yet exists, we make gamma a stoch, and use it to calculate mu
        # for computational efficiency, gamma is a linearly interpolated version of gamma_mesh
        initial_gamma = pl.log(dismod3.settings.NEARLY_ZERO + dm.get_initial_value(key))

        gamma_mesh = mc.Normal('age_coeffs_mesh_%s' % key, mu=mu_gamma[param_mesh], tau=sigma_gamma[param_mesh]**-2, value=initial_gamma[param_mesh])

        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(gamma_mesh=gamma_mesh, param_mesh=param_mesh, est_mesh=est_mesh):
            return dismod3.utils.interpolate(param_mesh, gamma_mesh, est_mesh)

        @mc.deterministic(name=key)
        def mu(Xa=X_region, Xb=X_study, alpha=alpha, beta=beta, gamma=gamma):
            return predict_rate([Xa, Xb], alpha, beta, gamma, lambda f, age: f, est_mesh)

        # Create a guess at the covariance matrix for MCMC proposals to update gamma_mesh
        from pymc.gp.cov_funs import matern
        a = pl.atleast_2d(param_mesh).T
        C = matern.euclidean(a, a, diff_degree = 2, amp = 1.**2, scale = 10.)

        vars.update(age_coeffs_mesh=gamma_mesh, age_coeffs=gamma, rate_stoch=mu, age_coeffs_mesh_step_cov=.005*pl.array(C))

        # adjust value of gamma_mesh based on priors, if necessary
        # TODO: implement more adjustments, currently only adjusted based on at_least priors
        for line in dm.get_priors(key).split(PRIOR_SEP_STR):
            prior = line.strip().split()
            if len(prior) == 0:
                continue
            if prior[0] == 'at_least':
                delta_gamma = pl.log(pl.maximum(mu.value, float(prior[1]))) - pl.log(mu.value)
                gamma_mesh.value = gamma_mesh.value + delta_gamma[param_mesh]

    # create potentials for priors
    dismod3.utils.generate_prior_potentials(vars, dm.get_priors(key), est_mesh)

    # create observed stochastics for data
    vars['data'] = []

    if mu_delta != 0.:  
        value = []
        N = []
        Xa = []
        Xb = []
        ai = []
        aw = []
        Xz = []

        for d in data_list:
            try:
                age_indices, age_weights, Y_i, N_i = values_from(dm, d)
            except ValueError:
                debug('WARNING: could not calculate likelihood for data %d' % d['id'])
                continue

            value.append(Y_i*N_i)
            N.append(N_i)
            Xa.append(covariates(d, covariate_dict)[0])
            Xb.append(covariates(d, covariate_dict)[1])
            Xz.append(float(d.get('bias') or 0.))
            ai.append(age_indices)
            aw.append(age_weights)

            vars['data'].append(d)

        N = pl.array(N)
        Xa = pl.array(Xa)
        Xb = pl.array(Xb)
        Xz = pl.array(Xz)
        value = pl.array(value)
        
        vars['effective_sample_size'] = list(N)
        
    if len(vars['data']) > 0:
        # TODO: consider using only a subset of the rates at each step of the fit to speed computation; say 100 of them
        k = 50000
        if len(vars['data']) < k:
            data_sample = range(len(vars['data']))
        else:
            import random
            @mc.deterministic(name='data_sample_%s' % key)
            def data_sample(n=len(vars['data']), k=k):
                return random.sample(range(n), k)

        @mc.deterministic(name='rate_%s' % key)
        def rates(S=data_sample,
                Xa=Xa, Xb=Xb,
                alpha=alpha, beta=beta, gamma=gamma,
                bounds_func=vars['bounds_func'],
                age_indices=ai,
                age_weights=aw):

            # calculate study-specific rate function
            shifts = pl.exp(pl.dot(Xa[S], alpha) + pl.dot(Xb[S], pl.atleast_1d(beta)))
            exp_gamma = pl.exp(gamma)
            mu = pl.zeros_like(shifts)
            for i,s in enumerate(S):
                mu[i] = pl.dot(age_weights[s], bounds_func(shifts[i] * exp_gamma[age_indices[s]], age_indices[s]))
                # TODO: evaluate speed increase and accuracy decrease of the following:
                #midpoint = age_indices[s][len(age_indices[s])/2]
                #mu[i] = bounds_func(shifts[i] * exp_gamma[midpoint], midpoint)
                # TODO: evaluate speed increase and accuracy decrease of the following: (to see speed increase, need to code this up using difference of running sums
                #mu[i] = pl.dot(pl.ones_like(age_weights[s]) / float(len(age_weights[s])),
                #               bounds_func(shifts[i] * exp_gamma[age_indices[s]], age_indices[s]))
            return mu
        vars['expected_rates'] = rates
        
        @mc.observed
        @mc.stochastic(name='data_%s' % key)
        def obs(value=value,
                S=data_sample,
                N=N,
                mu_i=rates,
                Xz=Xz,
                zeta=zeta,
                delta=delta):
            #zeta_i = .001
            #residual = pl.log(value[S] + zeta_i) - pl.log(mu_i*N[S] + zeta_i)
            #return mc.normal_like(residual, 0, 100. + delta)
            logp = mc.negative_binomial_like(value[S], N[S]*mu_i, delta*pl.exp(Xz*zeta))
            return logp

        vars['observed_counts'] = obs

        @mc.deterministic(name='predicted_data_%s' % key)
        def predictions(value=value,
                        N=N,
                        S=data_sample,
                        mu=rates,
                        delta=delta):
            r_S = mc.rnegative_binomial(N[S]*mu, delta)/N[S]
            r = pl.zeros(len(vars['data']))
            r[S] = r_S
            return r

        vars['predicted_rates'] = predictions
        debug('likelihood of %s contains %d rates' % (key, len(vars['data'])))

    # now do the same thing for the lower bound data
    # TODO: refactor to remove duplicated code
    vars['lower_bound_data'] = []
    value = []
    N = []
    Xa = []
    Xb = []
    ai = []
    aw = []
    for d in lower_bound_data:
        try:
            age_indices, age_weights, Y_i, N_i = values_from(dm, d)
        except ValueError:
            debug('WARNING: could not calculate likelihood for data %d' % d['id'])
            continue

        value.append(Y_i*N_i)
        N.append(N_i)
        Xa.append(covariates(d, covariate_dict)[0])
        Xb.append(covariates(d, covariate_dict)[1])
        ai.append(age_indices)
        aw.append(age_weights)

        vars['lower_bound_data'].append(d)

    N = pl.array(N)
    value = pl.array(value)

    if len(vars['lower_bound_data']) > 0:
        @mc.observed
        @mc.stochastic(name='lower_bound_data_%s' % key)
        def obs_lb(value=value, N=N,
                   Xa=Xa, Xb=Xb,
                   alpha=alpha, beta=beta, gamma=gamma,
                   bounds_func=vars['bounds_func'],
                   delta=delta,
                   age_indices=ai,
                   age_weights=aw):

            # calculate study-specific rate function
            shifts = pl.exp(pl.dot(Xa, alpha) + pl.dot(Xb, pl.atleast_1d(beta)))
            exp_gamma = pl.exp(gamma)
            mu_i = [pl.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)]  # TODO: try vectorizing this loop to increase speed
            rate_param = mu_i*N
            violated_bounds = pl.nonzero(rate_param < value)
            logp = mc.negative_binomial_like(value[violated_bounds], rate_param[violated_bounds], delta)
            return logp

        vars['observed_lower_bounds'] = obs_lb
        debug('likelihood of %s contains %d lowerbounds' % (key, len(vars['lower_bound_data'])))

    return vars
예제 #21
0
for i in range(len(plot_list)):
    plot_list_ix.append(cov_name.index(plot_list[i]))

# correctly ordering geographic regions
plot_list_ix_order = []
for i in range(len(index)):
    if (index[i] in plot_list_ix) == 1:
        plot_list_ix_order.append(index[i])  # if true, append

for y, i in enumerate(plot_list_ix_order):
    x1 = data.ix[cov_name[i], 'moderately']
    x2 = data.ix[cov_name[i], 'slightly']
    x3 = data.ix[cov_name[i], 'very']

    xerr1 = pl.array([
        x1 - pl.atleast_2d(data.ix[cov_name[i], 'moderately_l']),
        pl.atleast_2d(data.ix[cov_name[i], 'moderately_u']) - x1
    ])
    xerr2 = pl.array([
        x2 - pl.atleast_2d(data.ix[cov_name[i], 'slightly_l']),
        pl.atleast_2d(data.ix[cov_name[i], 'slightly_u']) - x2
    ])
    xerr3 = pl.array([
        x3 - pl.atleast_2d(data.ix[cov_name[i], 'very_l']),
        pl.atleast_2d(data.ix[cov_name[i], 'very_u']) - x3
    ])

    pl.errorbar(x2, 2.75 * y + .45, xerr=xerr2, fmt='ko',
                mec='w')  #, label = 'Slightly'
    pl.errorbar(x1, 2.75 * y, xerr=xerr1, fmt='k^',
                mec='w')  #, label = 'Moderately'
예제 #22
0
파일: model.py 프로젝트: flaxter/gbd
 def sigma_explained(W=W, gamma=gamma):
     """ sigma_explained_i,r,c,t,a = gamma * W_i,r,c,t,a"""
     return pl.dot(pl.atleast_1d(gamma), pl.atleast_2d(W))
예제 #23
0
    udata_12 = data["VSHS_OP"][:-1].values[e:int_end[k]:int_step]

    udata_13 = data["VSHS_CL"][:-1].values[e:int_end[k]:int_step]

    udata = ca.horzcat([udata_0, udata_1, udata_2, udata_3, udata_4, udata_5, udata_6, udata_7, udata_8, udata_9, \
    udata_10, udata_11, udata_12, udata_13])[:-1,:]



    x0_init = data["TSH0"].values[e:int_end[k]:int_step]
    x1_init = data["TSH2"].values[e:int_end[k]:int_step]
    x2_init = data["TSH3"].values[e:int_end[k]:int_step]
    x3_init = data["TSH1"].values[e:int_end[k]:int_step] 

    xinit = ca.horzcat([pl.atleast_2d(x0_init).T, pl.atleast_2d(x1_init).T, pl.atleast_2d(x2_init).T, pl.atleast_2d(x3_init).T,]) 

    ydata_0 = data["TSH0"].values[e:int_end[k]:int_step]
    ydata_1 = data["TSH2"].values[e:int_end[k]:int_step]
    ydata_2 = data["TSH3"].values[e:int_end[k]:int_step]
    ydata_3 = data["TSH1"].values[e:int_end[k]:int_step]

    ydata = ca.horzcat([pl.atleast_2d(ydata_0).T, pl.atleast_2d(ydata_1).T, pl.atleast_2d(ydata_2).T, pl.atleast_2d(ydata_3).T,]) #ca.repmat(y1_5_init, (1, ydata.shape[0])).T])



    pe_setups.append(cp.pe.LSq(system = system, time_points = time_points, \
        udata = udata, \
        pinit = pinit, \
        ydata = ydata, \
        xinit = xinit)) #, \
예제 #24
0
    udata_13 = data["VSHS_CL"][:-1].values[e:int_end[k]:int_step]

    udata = ca.horzcat([udata_0, udata_1, udata_2, udata_3, udata_4, udata_5, udata_6, udata_7, udata_8, udata_9, \
    udata_10, udata_11, udata_12, udata_13])[:-1,:]



    x0_init = data["TSH0"].values[e:int_end[k]:int_step]
    x1_init = data["TSH2"].values[e:int_end[k]:int_step]
    x2_init = data["TSH3"].values[e:int_end[k]:int_step]
    x3_init = data["TSH1"].values[e:int_end[k]:int_step] 
    x4_init = data["TSH0_5"].values[e:int_end[k]:int_step]
    x5_init = data["TSH2_5"].values[e:int_end[k]:int_step]
    x6_init = data["TSH3_5"].values[e:int_end[k]:int_step] 

    xinit = ca.horzcat([pl.atleast_2d(x0_init).T, pl.atleast_2d(x1_init).T, pl.atleast_2d(x2_init).T, pl.atleast_2d(x3_init).T, \
        pl.atleast_2d(x4_init).T, pl.atleast_2d(x5_init).T, pl.atleast_2d(x6_init).T,]) 

    ydata_0 = data["TSH0"].values[e:int_end[k]:int_step]
    ydata_1 = data["TSH2"].values[e:int_end[k]:int_step]
    ydata_2 = data["TSH3"].values[e:int_end[k]:int_step]
    ydata_3 = data["TSH1"].values[e:int_end[k]:int_step]
    ydata_4 = data["TSH0_5"].values[e:int_end[k]:int_step]
    ydata_5 = data["TSH2_5"].values[e:int_end[k]:int_step]
    ydata_6 = data["TSH3_5"].values[e:int_end[k]:int_step] 


    ydata = ca.horzcat([pl.atleast_2d(ydata_0).T, pl.atleast_2d(ydata_1).T, pl.atleast_2d(ydata_2).T, pl.atleast_2d(ydata_3).T,\
        pl.atleast_2d(ydata_4).T, pl.atleast_2d(ydata_5).T, pl.atleast_2d(ydata_6).T,]) 

    # wv = pl.ones(ydata.shape[0])
예제 #25
0
 def extract(self):
     Features.extract(self)
     mf = (self.X.T * self._logfrqs).sum(1) / self.X.T.sum(1)
     self.X = (((self.X / self.X.T.sum(1)).T * ((P.atleast_2d(self._logfrqs).T - mf)).T)**2).sum(1) 
예제 #26
0
def predict_for(model, parameters,
                root_area, root_sex, root_year,
                area, sex, year,
                population_weighted,
                vars,
                lower, upper):
    """ Generate draws from posterior predicted distribution for a
    specific (area, sex, year)

    :Parameters:
      - `model` : data.DataModel
      - `root_area` : str, area for which this model was fit consistently
      - `root_sex` : str, area for which this model was fit consistently
      - `root_year` : str, area for which this model was fit consistently
      - `area` : str, area to predict for
      - `sex` : str, sex to predict for
      - `year` : str, year to predict for
      - `population_weighted` : bool, should prediction be population weighted if it is the aggregation of units area RE hierarchy?
      - `vars` : dict, including entries for alpha, beta, mu_age, U, and X
      - `lower, upper` : float, bounds on predictions from expert priors

    :Results:
      - Returns array of draws from posterior predicted distribution

    """
    area_hierarchy = model.hierarchy
    output_template = model.output_template.copy()

    # find number of samples from posterior
    len_trace = len(vars['mu_age'].trace())

    # compile array of draws from posterior distribution of alpha (random effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each random effect (e.g. countries with data, regions with countries with data, etc)
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['alpha'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['alpha'] is a list of pymc Nodes
    #   vars['alpha'] is a list of floats
    #   vars['alpha'] is a list of some floats and some pymc Nodes
    #   'alpha' is not in vars
    #
    # when vars['alpha'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_alpha_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction
    
    if 'alpha' in vars and isinstance(vars['alpha'], mc.Node):
        assert 0, 'No longer used'
        alpha_trace = vars['alpha'].trace()
    elif 'alpha' in vars and isinstance(vars['alpha'], list):
        alpha_trace = []
        for n, sigma in zip(vars['alpha'], vars['const_alpha_sigma']):
            if isinstance(n, mc.Node):
                alpha_trace.append(n.trace())
            else:
                # uncertainty of constant alpha incorporated here
                sigma = max(sigma, 1.e-9) # make sure sigma is non-zero
                assert not pl.isnan(sigma)
                alpha_trace.append(mc.rnormal(float(n), sigma**-2, size=len_trace))
        alpha_trace = pl.vstack(alpha_trace).T
    else:
        alpha_trace = pl.array([])


    # compile array of draws from posterior distribution of beta (fixed effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each fixed effect
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['beta'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['beta'] is a list of pymc Nodes
    #   vars['beta'] is a list of floats
    #   vars['beta'] is a list of some floats and some pymc Nodes
    #   'beta' is not in vars
    #
    # when vars['beta'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_beta_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction
    #
    # TODO: refactor to reduce duplicate code (this is very similar to code for alpha above)

    if 'beta' in vars and isinstance(vars['beta'], mc.Node):
        assert 0, 'No longer used'
        beta_trace = vars['beta'].trace()
    elif 'beta' in vars and isinstance(vars['beta'], list):
        beta_trace = []
        for n, sigma in zip(vars['beta'], vars['const_beta_sigma']):
            if isinstance(n, mc.Node):
                beta_trace.append(n.trace())
            else:
                # uncertainty of constant beta incorporated here
                sigma = max(sigma, 1.e-9) # make sure sigma is non-zero
                assert not pl.isnan(sigma)
                beta_trace.append(mc.rnormal(float(n), sigma**-2., size=len_trace))
        beta_trace = pl.vstack(beta_trace).T
    else:
        beta_trace = pl.array([])

    # the prediction for the requested area is produced by aggregating predictions for all of the childred
    # of that area in the area_hierarchy (a networkx.DiGraph)

    leaves = [n for n in nx.traversal.bfs_tree(area_hierarchy, area) if area_hierarchy.successors(n) == []]
    if len(leaves) == 0:
        # networkx returns an empty list when the bfs tree is a single node
        leaves = [area]


    # initialize covariate_shift and total_population
    covariate_shift = pl.zeros(len_trace)
    total_population = 0.

    # group output_template for easy access
    output_template = output_template.groupby(['area', 'sex', 'year']).mean()

    # if there are fixed effects, the effect coefficients are stored as an array in vars['X']
    # use this to put together a covariate matrix for the predictions, according to the output_template
    # covariate values
    #
    # the resulting array is covs
    if 'X' in vars:
        covs = output_template.filter(vars['X'].columns)
        if 'x_sex' in vars['X'].columns:
            covs['x_sex'] = sex_value[sex]
        assert pl.all(covs.columns == vars['X_shift'].index), 'covariate columns and unshift index should match up'
        for x_i in vars['X_shift'].index:
            covs[x_i] -= vars['X_shift'][x_i] # shift covariates so that the root node has X_ar,sr,yr == 0
    else:
        covs = pandas.DataFrame(index=output_template.index)

    # if there are random effects, put together an indicator based on
    # their hierarchical relationships
    #
    if 'U' in vars:
        p_U = area_hierarchy.number_of_nodes()  # random effects for area
        U_l = pandas.DataFrame(pl.zeros((1, p_U)), columns=area_hierarchy.nodes())
        U_l = U_l.filter(vars['U'].columns)
    else:
        U_l = pandas.DataFrame(index=[0])

    # loop through leaves of area_hierarchy subtree rooted at 'area',
    # make prediction for each using appropriate random
    # effects and appropriate fixed effect covariates
    #
    for l in leaves:
        log_shift_l = pl.zeros(len_trace)
        U_l.ix[0,:] = 0.

        root_to_leaf = nx.shortest_path(area_hierarchy, root_area, l)
        for node in root_to_leaf[1:]:
            if node not in U_l.columns:
                ## Add a columns U_l[node] = rnormal(0, appropriate_tau)
                level = len(nx.shortest_path(area_hierarchy, 'all', node))-1
                if 'sigma_alpha' in vars:
                    tau_l = vars['sigma_alpha'][level].trace()**-2
                    
                U_l[node] = 0.

                # if this node was not already included in the alpha_trace array, add it
                # there are several cases for adding:
                #  if the random effect has a distribution of Constant
                #    add it, using a sigma as well
                #  otherwise, sample from a normal with mean zero and standard deviation tau_l
                if parameters.get('random_effects', {}).get(node, {}).get('dist') == 'Constant':
                    mu = parameters['random_effects'][node]['mu']
                    sigma = parameters['random_effects'][node]['sigma']
                    sigma = max(sigma, 1.e-9) # make sure sigma is non-zero

                    alpha_node = mc.rnormal(mu,
                                            sigma**-2,
                                            size=len_trace)
                else:
                    if 'sigma_alpha' in vars:
                        alpha_node = mc.rnormal(0., tau_l)
                    else:
                        alpha_node = pl.zeros(len_trace)

                if len(alpha_trace) > 0:
                    alpha_trace = pl.vstack((alpha_trace.T, alpha_node)).T
                else:
                    alpha_trace = pl.atleast_2d(alpha_node).T

            # TODO: implement a more robust way to align alpha_trace and U_l
            U_l.ix[0, node] = 1.

        # 'shift' the random effects matrix to have the intended
        # level of the hierarchy as the reference value
        if 'U_shift' in vars:
            for node in vars['U_shift']:
                U_l -= vars['U_shift'][node]

        # add the random effect intercept shift (len_trace draws)
        log_shift_l += pl.dot(alpha_trace, U_l.T).flatten()
            
        # make X_l
        if len(beta_trace) > 0:
            X_l = covs.ix[l, sex, year]
            log_shift_l += pl.dot(beta_trace, X_l.T).flatten()

        if population_weighted:
            # combine in linear-space with population weights
            shift_l = pl.exp(log_shift_l)
            covariate_shift += shift_l * output_template['pop'][l,sex,year]
            total_population += output_template['pop'][l,sex,year]
        else:
            # combine in log-space without weights
            covariate_shift += log_shift_l
            total_population += 1.

    if population_weighted:
        covariate_shift /= total_population
    else:
        covariate_shift = pl.exp(covariate_shift / total_population)
        
    parameter_prediction = (vars['mu_age'].trace().T * covariate_shift).T
        
    # clip predictions to bounds from expert priors
    parameter_prediction = parameter_prediction.clip(lower, upper)
    
    return parameter_prediction
udata = ca.horzcat([udata_0, udata_1, udata_2, udata_3, udata_4, udata_5, udata_6, udata_7, udata_8, udata_9, \
    udata_10, udata_11, udata_12, udata_13])

x0_init = data["TSH0"].values[int_start]
x1_init = data["TSH2"].values[int_start]
x2_init = data["TSH3"].values[int_start]
x3_init = data["TSH1"].values[int_start]
x4_init = data["TSH0_1"].values[int_start]
x5_init = data["TSH0_2"].values[int_start]
x6_init = data["TSH2_1"].values[int_start]
x7_init = data["TSH2_2"].values[int_start]
x8_init = data["TSH3_1"].values[int_start]
x9_init = data["TSH3_2"].values[int_start]

xinit = ca.horzcat([pl.atleast_2d(x0_init).T, pl.atleast_2d(x1_init).T, pl.atleast_2d(x2_init).T, pl.atleast_2d(x3_init).T, \
    pl.atleast_2d(x4_init).T, pl.atleast_2d(x5_init).T, pl.atleast_2d(x6_init).T, pl.atleast_2d(x7_init).T, pl.atleast_2d(x8_init).T, \
    pl.atleast_2d(x9_init).T,])

# mpe = cp.pe.MultiLSq(pe_setups)
# # # mpe.run_parameter_estimation({"linear_solver": "ma57"})
# mpe.run_parameter_estimation()

sim_est = cp.sim.Simulation(system=system, pdata=0.0)
# sim_est = cp.sim.Simulation(system = system, pdata = mpe.estimated_parameters)
sim_est.run_system_simulation(time_points = time_points, \
    x0 = xinit[0,:], udata = udata)

pl.close("all")

# # # Plot
예제 #28
0
 def extract(self):
     Features.extract(self)
     mf = (self.X.T * self._logfrqs).sum(1) / self.X.T.sum(1)
     self.X = (((self.X / self.X.T.sum(1)).T *
                ((P.atleast_2d(self._logfrqs).T - mf)).T)**2).sum(1)
예제 #29
0
def predict_for(model, parameters, root_area, root_sex, root_year, area, sex,
                year, population_weighted, vars, lower, upper):
    """ Generate draws from posterior predicted distribution for a
    specific (area, sex, year)

    :Parameters:
      - `model` : data.DataModel
      - `root_area` : str, area for which this model was fit consistently
      - `root_sex` : str, area for which this model was fit consistently
      - `root_year` : str, area for which this model was fit consistently
      - `area` : str, area to predict for
      - `sex` : str, sex to predict for
      - `year` : str, year to predict for
      - `population_weighted` : bool, should prediction be population weighted if it is the aggregation of units area RE hierarchy?
      - `vars` : dict, including entries for alpha, beta, mu_age, U, and X
      - `lower, upper` : float, bounds on predictions from expert priors

    :Results:
      - Returns array of draws from posterior predicted distribution

    """
    area_hierarchy = model.hierarchy
    output_template = model.output_template.copy()

    # find number of samples from posterior
    len_trace = len(vars['mu_age'].trace())

    # compile array of draws from posterior distribution of alpha (random effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each random effect (e.g. countries with data, regions with countries with data, etc)
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['alpha'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['alpha'] is a list of pymc Nodes
    #   vars['alpha'] is a list of floats
    #   vars['alpha'] is a list of some floats and some pymc Nodes
    #   'alpha' is not in vars
    #
    # when vars['alpha'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_alpha_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction

    if 'alpha' in vars and isinstance(vars['alpha'], mc.Node):
        assert 0, 'No longer used'
        alpha_trace = vars['alpha'].trace()
    elif 'alpha' in vars and isinstance(vars['alpha'], list):
        alpha_trace = []
        for n, sigma in zip(vars['alpha'], vars['const_alpha_sigma']):
            if isinstance(n, mc.Node):
                alpha_trace.append(n.trace())
            else:
                # uncertainty of constant alpha incorporated here
                sigma = max(sigma, 1.e-9)  # make sure sigma is non-zero
                assert not pl.isnan(sigma)
                alpha_trace.append(
                    mc.rnormal(float(n), sigma**-2, size=len_trace))
        alpha_trace = pl.vstack(alpha_trace).T
    else:
        alpha_trace = pl.array([])

    # compile array of draws from posterior distribution of beta (fixed effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each fixed effect
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['beta'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['beta'] is a list of pymc Nodes
    #   vars['beta'] is a list of floats
    #   vars['beta'] is a list of some floats and some pymc Nodes
    #   'beta' is not in vars
    #
    # when vars['beta'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_beta_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction
    #
    # TODO: refactor to reduce duplicate code (this is very similar to code for alpha above)

    if 'beta' in vars and isinstance(vars['beta'], mc.Node):
        assert 0, 'No longer used'
        beta_trace = vars['beta'].trace()
    elif 'beta' in vars and isinstance(vars['beta'], list):
        beta_trace = []
        for n, sigma in zip(vars['beta'], vars['const_beta_sigma']):
            if isinstance(n, mc.Node):
                beta_trace.append(n.trace())
            else:
                # uncertainty of constant beta incorporated here
                sigma = max(sigma, 1.e-9)  # make sure sigma is non-zero
                assert not pl.isnan(sigma)
                beta_trace.append(
                    mc.rnormal(float(n), sigma**-2., size=len_trace))
        beta_trace = pl.vstack(beta_trace).T
    else:
        beta_trace = pl.array([])

    # the prediction for the requested area is produced by aggregating predictions for all of the childred
    # of that area in the area_hierarchy (a networkx.DiGraph)

    leaves = [
        n for n in nx.traversal.bfs_tree(area_hierarchy, area)
        if area_hierarchy.successors(n) == []
    ]
    if len(leaves) == 0:
        # networkx returns an empty list when the bfs tree is a single node
        leaves = [area]

    # initialize covariate_shift and total_population
    covariate_shift = pl.zeros(len_trace)
    total_population = 0.

    # group output_template for easy access
    output_template = output_template.groupby(['area', 'sex', 'year']).mean()

    # if there are fixed effects, the effect coefficients are stored as an array in vars['X']
    # use this to put together a covariate matrix for the predictions, according to the output_template
    # covariate values
    #
    # the resulting array is covs
    if 'X' in vars:
        covs = output_template.filter(vars['X'].columns)
        if 'x_sex' in vars['X'].columns:
            covs['x_sex'] = sex_value[sex]
        assert pl.all(covs.columns == vars['X_shift'].index
                      ), 'covariate columns and unshift index should match up'
        for x_i in vars['X_shift'].index:
            covs[x_i] -= vars['X_shift'][
                x_i]  # shift covariates so that the root node has X_ar,sr,yr == 0
    else:
        covs = pandas.DataFrame(index=output_template.index)

    # if there are random effects, put together an indicator based on
    # their hierarchical relationships
    #
    if 'U' in vars:
        p_U = area_hierarchy.number_of_nodes()  # random effects for area
        U_l = pandas.DataFrame(pl.zeros((1, p_U)),
                               columns=area_hierarchy.nodes())
        U_l = U_l.filter(vars['U'].columns)
    else:
        U_l = pandas.DataFrame(index=[0])

    # loop through leaves of area_hierarchy subtree rooted at 'area',
    # make prediction for each using appropriate random
    # effects and appropriate fixed effect covariates
    #
    for l in leaves:
        log_shift_l = pl.zeros(len_trace)
        U_l.ix[0, :] = 0.

        root_to_leaf = nx.shortest_path(area_hierarchy, root_area, l)
        for node in root_to_leaf[1:]:
            if node not in U_l.columns:
                ## Add a columns U_l[node] = rnormal(0, appropriate_tau)
                level = len(nx.shortest_path(area_hierarchy, 'all', node)) - 1
                if 'sigma_alpha' in vars:
                    tau_l = vars['sigma_alpha'][level].trace()**-2

                U_l[node] = 0.

                # if this node was not already included in the alpha_trace array, add it
                # there are several cases for adding:
                #  if the random effect has a distribution of Constant
                #    add it, using a sigma as well
                #  otherwise, sample from a normal with mean zero and standard deviation tau_l
                if parameters.get('random_effects',
                                  {}).get(node, {}).get('dist') == 'Constant':
                    mu = parameters['random_effects'][node]['mu']
                    sigma = parameters['random_effects'][node]['sigma']
                    sigma = max(sigma, 1.e-9)  # make sure sigma is non-zero

                    alpha_node = mc.rnormal(mu, sigma**-2, size=len_trace)
                else:
                    if 'sigma_alpha' in vars:
                        alpha_node = mc.rnormal(0., tau_l)
                    else:
                        alpha_node = pl.zeros(len_trace)

                if len(alpha_trace) > 0:
                    alpha_trace = pl.vstack((alpha_trace.T, alpha_node)).T
                else:
                    alpha_trace = pl.atleast_2d(alpha_node).T

            # TODO: implement a more robust way to align alpha_trace and U_l
            U_l.ix[0, node] = 1.

        # 'shift' the random effects matrix to have the intended
        # level of the hierarchy as the reference value
        if 'U_shift' in vars:
            for node in vars['U_shift']:
                U_l -= vars['U_shift'][node]

        # add the random effect intercept shift (len_trace draws)
        log_shift_l += pl.dot(alpha_trace, U_l.T).flatten()

        # make X_l
        if len(beta_trace) > 0:
            X_l = covs.ix[l, sex, year]
            log_shift_l += pl.dot(beta_trace, X_l.T).flatten()

        if population_weighted:
            # combine in linear-space with population weights
            shift_l = pl.exp(log_shift_l)
            covariate_shift += shift_l * output_template['pop'][l, sex, year]
            total_population += output_template['pop'][l, sex, year]
        else:
            # combine in log-space without weights
            covariate_shift += log_shift_l
            total_population += 1.

    if population_weighted:
        covariate_shift /= total_population
    else:
        covariate_shift = pl.exp(covariate_shift / total_population)

    parameter_prediction = (vars['mu_age'].trace().T * covariate_shift).T

    # clip predictions to bounds from expert priors
    parameter_prediction = parameter_prediction.clip(lower, upper)

    return parameter_prediction
예제 #30
0
# index of area of interest in covariate list
plot_list_ix = []
for i in range(len(plot_list)): 
    plot_list_ix.append(cov_name.index(plot_list[i]))

# correctly ordering geographic regions
plot_list_ix_order = []
for i in range(len(index)):
    if (index[i] in plot_list_ix) == 1: plot_list_ix_order.append(index[i]) # if true, append
    
for y, i in enumerate(plot_list_ix_order):
    x1 = data.ix[cov_name[i],'moderately']
    x2 = data.ix[cov_name[i],'slightly'] 
    x3 = data.ix[cov_name[i],'very']
    
    xerr1 = pl.array([x1 - pl.atleast_2d(data.ix[cov_name[i],'moderately_l']),
                      pl.atleast_2d(data.ix[cov_name[i],'moderately_u']) - x1])
    xerr2 = pl.array([x2 - pl.atleast_2d(data.ix[cov_name[i],'slightly_l']),
                      pl.atleast_2d(data.ix[cov_name[i],'slightly_u']) - x2])
    xerr3 = pl.array([x3 - pl.atleast_2d(data.ix[cov_name[i],'very_l']),
                      pl.atleast_2d(data.ix[cov_name[i],'very_u']) - x3])
        
    pl.errorbar(x2, 2.75*y+.45, xerr=xerr2, fmt='ko', mec='w')#, label = 'Slightly'
    pl.errorbar(x1, 2.75*y, xerr=xerr1, fmt='k^', mec='w')#, label = 'Moderately'
    pl.errorbar(x3, 2.75*y-.45, xerr=xerr3, fmt='ks', mec='w')#, label = 'Very'
        
pl.plot([-10],[-10], 'ko-', mec='w', label = '$\delta \sim \\mathrm{Uniform}(9,$ $81)$')
pl.plot([-10],[-10], 'k^-', mec='w', label = '$\delta \sim \\mathrm{Uniform}(3,$ $27)$')
pl.plot([-10],[-10], 'ks-', mec='w', label = '$\delta \sim \mathrm{Uniform}(1,$ $9)$')
l,r,b,t = pl.axis()
r = 3.0
예제 #31
0
파일: utils.py 프로젝트: aflaxman/gbd
def generate_prior_potentials(rate_vars, prior_str, age_mesh):
    """
    augment the rate_vars dict to include a list of potentials that model priors on  rate_vars['rate_stoch']

    prior_str may have entries in the following format:
      smooth <tau> [<age_start> <age_end>]
      zero <age_start> <age_end>
      confidence <mean> <tau>
      increasing <age_start> <age_end>
      decreasing <age_start> <age_end>
      convex_up <age_start> <age_end>
      convex_down <age_start> <age_end>
      unimodal <age_start> <age_end>
      value <mean> <tau> [<age_start> <age_end>]
      at_least <value>
      at_most <value>
      max_at_most <value>
            
    for example: 'smooth .1, zero 0 5, zero 95 100'

    age_mesh[i] indicates what age the value of rate[i] corresponds to
    """

    def derivative_sign_prior(rate, prior, deriv, sign):
        age_start = int(prior[1])
        age_end = int(prior[2])
        age_indices = indices_for_range(age_mesh, age_start, age_end)
        @mc.potential(name='deriv_sign_{%d,%d,%d,%d}^%s' % (deriv, sign, age_start, age_end, str(rate)))
        def deriv_sign_rate(f=rate,
                            age_indices=age_indices,
                            tau=1.e14,
                            deriv=deriv, sign=sign):
            df = pl.diff(f[age_indices], deriv)
            return mc.normal_like(pl.absolute(df) * (sign * df < 0), 0., tau)
        return [deriv_sign_rate]

    priors = []
    rate = rate_vars['rate_stoch']
    rate_vars['bounds_func'] = lambda f, age: f
    for line in prior_str.split(PRIOR_SEP_STR):
        prior = line.strip().split()
        if len(prior) == 0:
            continue
        if prior[0] == 'smooth':
            pass # handle this after applying all level bounds
        elif prior[0] == 'heterogeneity':
            # prior affects dispersion term of model; handle as a special case
            continue

        elif prior[0] == 'increasing':
            priors += derivative_sign_prior(rate, prior, deriv=1, sign=1)
        elif prior[0] == 'decreasing':
            priors += derivative_sign_prior(rate, prior, deriv=1, sign=-1)
        elif prior[0] == 'convex_down':
            priors += derivative_sign_prior(rate, prior, deriv=2, sign=-1)
        elif prior[0] == 'convex_up':
            priors += derivative_sign_prior(rate, prior, deriv=2, sign=1)

        elif prior[0] == 'unimodal':
            age_start = int(prior[1])
            age_end = int(prior[2])
            age_indices = indices_for_range(age_mesh, age_start, age_end)

            @mc.potential(name='unimodal_{%d,%d}^%s' % (age_start, age_end, str(rate)))
            def unimodal_rate(f=rate, age_indices=age_indices, tau=1.e5):
                df = pl.diff(f[age_indices])
                sign_changes = pl.find((df[:-1] > NEARLY_ZERO) & (df[1:] < -NEARLY_ZERO))
                sign = pl.ones(len(age_indices)-2)
                if len(sign_changes) > 0:
                    change_age = sign_changes[len(sign_changes)/2]
                    sign[change_age:] = -1.
                return -tau*pl.dot(pl.absolute(df[:-1]), (sign * df[:-1] < 0))
            priors += [unimodal_rate]

        elif prior[0] == 'max_at_least':
            val = float(prior[1])

            @mc.potential(name='max_at_least_{%f}^{%s}' % (val, str(rate)))
            def max_at_least(cur_max=rate, at_least=val, tau=(.001*val)**-2):
                return -tau * (cur_max - at_least)**2 * (cur_max < at_least)
            priors += [max_at_least]

        elif prior[0] == 'level_value':
            val = float(prior[1]) + 1.e-9

            if len(prior) == 4:
                age_start = int(prior[2])
                age_end = int(prior[3])
            else:
                age_start = 0
                age_end = MAX_AGE
            age_indices = indices_for_range(age_mesh, age_start, age_end)

            def new_bounds_func(f, age, val=val, age_start=age_start, age_end=age_end, prev_bounds_func=rate_vars['bounds_func']):
                age = pl.array(age)
                return pl.where((age >= age_start) & (age <= age_end), val, prev_bounds_func(f, age))
            rate_vars['bounds_func'] = new_bounds_func

        elif prior[0] == 'at_most':
            val = float(prior[1])

            def new_bounds_func(f, age, val=val, prev_bounds_func=rate_vars['bounds_func']):
                return pl.minimum(prev_bounds_func(f, age), val)
            rate_vars['bounds_func'] = new_bounds_func

        elif prior[0] == 'at_least':
            val = float(prior[1])

            def new_bounds_func(f, age, val=val, prev_bounds_func=rate_vars['bounds_func']):
                return pl.maximum(prev_bounds_func(f, age), val)
            rate_vars['bounds_func'] = new_bounds_func

        else:
            raise KeyError, 'Unrecognized prior: %s' % prior_str

    # update rate stoch with the bounds func from the priors
    # TODO: create this before smoothing, so that smoothing takes levels into account
    @mc.deterministic(name='%s_w_bounds'%rate_vars['rate_stoch'].__name__)
    def mu_bounded(mu=rate_vars['rate_stoch'], bounds_func=rate_vars['bounds_func']):
        return bounds_func(mu, pl.arange(101))  # FIXME: don't hardcode age range
    rate_vars['unbounded_rate'] = rate_vars['rate_stoch']
    rate_vars['rate_stoch'] = mu_bounded
    rate = rate_vars['rate_stoch']

    # add potential to encourage rate to look like level bounds
    @mc.potential(name='%s_potential'%rate_vars['rate_stoch'].__name__)
    def mu_potential(mu1=rate_vars['unbounded_rate'], mu2=rate_vars['rate_stoch']):
        return mc.normal_like(mu1, mu2, .0001**-2)
    rate_vars['rate_potential'] = mu_potential

    # add smoothing prior to the rate with level bounds
    for line in prior_str.split(PRIOR_SEP_STR):
        prior = line.strip().split()
        if len(prior) == 0:
            continue
        if prior[0] == 'smooth':
            scale = float(prior[1])

            if len(prior) == 4:
                age_start = int(prior[2])
                age_end = int(prior[3])
            else:
                age_start = 0
                age_end = MAX_AGE
                
            age_indices = indices_for_range(pl.arange(MAX_AGE), age_start, age_end)
            
            from pymc.gp.cov_funs import matern
            a = pl.atleast_2d(age_indices).T
            C = matern.euclidean(a, a, diff_degree=2, amp=10., scale=scale)
            @mc.potential(name='smooth_{%d,%d}^%s' % (age_start, age_end, str(rate)))
            def smooth_rate(f=rate, age_indices=age_indices, C=C):
                log_rate = pl.log(pl.maximum(f, NEARLY_ZERO))
                return mc.mv_normal_cov_like(log_rate[age_indices] - log_rate[age_indices].mean(),
                                             pl.zeros_like(age_indices),
                                             C=C)
            priors += [smooth_rate]
            print 'added smoothing potential for %s' % smooth_rate


    rate_vars['priors'] = priors
예제 #32
0
파일: graphics.py 프로젝트: aflaxman/gbd
def plot_one_effects(model, data_type):
    """ Plot random effects and fixed effects.
    
    :Parameters:
      - `model` : data.ModelData
      - `data_types` : str, one of 'i', 'r', 'f', 'p', 'rr', 'pf'
      
    """
    vars = model.vars[data_type]
    hierarchy = model.hierarchy
    
    pl.figure(figsize=(22, 17))
    for i, (covariate, effect) in enumerate([['U', 'alpha'], ['X', 'beta']]):
        if covariate not in vars:
            continue
        
        cov_name = list(vars[covariate].columns)
        
        if isinstance(vars.get(effect), mc.Stochastic):
            pl.subplot(1, 2, i+1)
            pl.title('%s_%s' % (effect, data_type))

            stats = vars[effect].stats()
            if stats:
                if effect == 'alpha':
                    index = sorted(pl.arange(len(cov_name)),
                                   key=lambda i: str(cov_name[i] in hierarchy and nx.shortest_path(hierarchy, 'all', cov_name[i]) or cov_name[i]))
                elif effect == 'beta':
                    index = pl.arange(len(cov_name))

                x = pl.atleast_1d(stats['mean'])
                y = pl.arange(len(x))

                xerr = pl.array([x - pl.atleast_2d(stats['95% HPD interval'])[:,0],
                                 pl.atleast_2d(stats['95% HPD interval'])[:,1] - x])
                pl.errorbar(x[index], y[index], xerr=xerr[:, index], fmt='bs', mec='w')

                l,r,b,t = pl.axis()
                pl.vlines([0], b-.5, t+.5)
                pl.hlines(y, l, r, linestyle='dotted')
                pl.xticks([l, 0, r])
                pl.yticks([])
                for i in index:
                    spaces = cov_name[i] in hierarchy and len(nx.shortest_path(hierarchy, 'all', cov_name[i])) or 0
                    pl.text(l, y[i], '%s%s' % (' * '*spaces, cov_name[i]), va='center', ha='left')
                pl.axis([l, r, -.5, t+.5])
                
        if isinstance(vars.get(effect), list):
            pl.subplot(1, 2, i+1)
            pl.title('%s_%s' % (effect, data_type))
            index = sorted(pl.arange(len(cov_name)),
                           key=lambda i: str(cov_name[i] in hierarchy and nx.shortest_path(hierarchy, 'all', cov_name[i]) or cov_name[i]))

            for y, i in enumerate(index):
                n = vars[effect][i]
                if isinstance(n, mc.Stochastic) or isinstance(n, mc.Deterministic):
                    stats = n.stats()
                    if stats:
                        x = pl.atleast_1d(stats['mean'])

                        xerr = pl.array([x - pl.atleast_2d(stats['95% HPD interval'])[:,0],
                                         pl.atleast_2d(stats['95% HPD interval'])[:,1] - x])
                        pl.errorbar(x, y, xerr=xerr, fmt='bs', mec='w')

            l,r,b,t = pl.axis()
            pl.vlines([0], b-.5, t+.5)
            pl.hlines(y, l, r, linestyle='dotted')
            pl.xticks([l, 0, r])
            pl.yticks([])

            for y, i in enumerate(index):
                spaces = cov_name[i] in hierarchy and len(nx.shortest_path(hierarchy, 'all', cov_name[i])) or 0
                pl.text(l, y, '%s%s' % (' * '*spaces, cov_name[i]), va='center', ha='left')

            pl.axis([l, r, -.5, t+.5])
                

            if effect == 'alpha':
                effect_str = ''
                for sigma in vars['sigma_alpha']:
                    stats = sigma.stats()
                    if stats:
                        effect_str += '%s = %.3f\n' % (sigma.__name__, stats['mean'])
                    else:
                        effect_str += '%s = %.3f\n' % (sigma.__name__, sigma.value)
                pl.text(r, t, effect_str, va='top', ha='right')
            elif effect == 'beta':
                effect_str = ''
                if 'eta' in vars:
                    eta = vars['eta']
                    stats = eta.stats()
                    if stats:
                        effect_str += '%s = %.3f\n' % (eta.__name__, stats['mean'])
                    else:
                        effect_str += '%s = %.3f\n' % (eta.__name__, eta.value)
                pl.text(r, t, effect_str, va='top', ha='right')