def OC(nelx,nely,x,volfrac,dc)  :
    l1 = 0; l2 = 100000; move = 0.2;
    while (l2-l1 > 1e-4):
      lmid = 0.5*(l2+l1)
      xnew = py.maximum(1e-3,py.maximum(x-move,py.minimum(1.0,py.minimum(x+move,x*py.sqrt(-dc/lmid)))));
      if sum(xnew) - volfrac*nelx*nely > 0:
        l1 = lmid
      else:
        l2 = lmid
    return xnew
Beispiel #2
0
def alpha_psp(height, t1, t2, start, offset, time):
    t1 = p.maximum(t1, 0.)
    t2 = p.maximum(t2, 0.)

    tau_frac = t2 / p.float64(t1)
    t = p.maximum((time - start) / p.float64(t1), 0)
    epsilon = 1E-8
    if 1. - epsilon < tau_frac < 1. + epsilon:
        return parpsp_e(height, t) + offset
    else:
        return parpsp(height, tau_frac, t) + offset
def check(nelx,nely,rmin,x,dc):
    dcn=py.zeros((nely,nelx));
    for i in range(1,nelx+1):
      for j in range(1,nely+1):
          sumx=0.0 
          for k in range(py.maximum(i-py.floor(rmin),1),py.minimum(i+py.floor(rmin),nelx)+1):
              
              for l in range(py.maximum(j-py.floor(rmin),1),py.minimum(j+py.floor(rmin),nely)+1):
                fac = rmin-py.sqrt((i-k)**2+(j-l)**2)
                sumx = sumx+py.maximum(0,fac)
                dcn[j-1,i-1] = dcn[j-1,i-1] + py.maximum(0,fac)*x[l-1,k-1]*dc[l-1,k-1]
                 
          dcn[j-1,i-1] = dcn[j-1,i-1]/(x[j-1,i-1]*sumx)          
    
    return dcn   
def dense_gauss_kernel(sigma, x, y=None):
    xf = pylab.fft2(x)  # x in Fourier domain
    x_flat = x.flatten()
    xx = pylab.dot(x_flat.transpose(), x_flat)  # squared norm of x

    if y is not None:
        yf = pylab.fft2(y)
        y_flat = y.flatten()
        yy = pylab.dot(y_flat.transpose(), y_flat)
    else:
        yf = xf
        yy = xx

    xyf = pylab.multiply(xf, pylab.conj(yf))

    xyf_ifft = pylab.ifft2(xyf)
    row_shift, col_shift = pylab.floor(pylab.array(x.shape) / 2).astype(int)
    xy_complex = pylab.roll(xyf_ifft, row_shift, axis=0)
    xy_complex = pylab.roll(xy_complex, col_shift, axis=1)
    xy = pylab.real(xy_complex)

    scaling = -1 / (sigma**2)
    xx_yy = xx + yy
    xx_yy_2xy = xx_yy - 2 * xy
    k = pylab.exp(scaling * pylab.maximum(0, xx_yy_2xy / x.size))

    return k
Beispiel #5
0
    def __call__(self, time, height, tau_1, tau_2, start, offset):
        """
        evaluate the psp for the given parameters
        """
        tau_1 = p.maximum(tau_1, 0.)
        tau_2 = p.maximum(tau_2, 0.)

        tau_frac = tau_2 / p.float64(tau_1)
        t = p.maximum((time - start) / p.float64(tau_1), 0)

        self.__shape_switch_limit = 1E-8

        if (1. - self.__shape_switch_limit < tau_frac <
                1. + self.__shape_switch_limit):
            return self.__psp_singular(height, t) + offset
        else:
            return self.__psp_normal(height, tau_frac, t) + offset
Beispiel #6
0
def set_rate(rate_type, value):
    t = {
        'incidence': 'i',
        'remission': 'r',
        'excess-mortality': 'f'
    }[rate_type]
    for i, k_i in enumerate(model.vars[t]['knots']):
        model.vars[t]['gamma'][i].value = pl.log(pl.maximum(1.e-9, value[i]))
def ctc_align_targets(outputs,
                      targets,
                      threshold=100.0,
                      verbose=0,
                      debug=0,
                      lo=1e-5):
    outputs = maximum(lo, outputs)
    outputs = outputs * 1.0 / sum(outputs, axis=1)[:, newaxis]
    match = dot(outputs, targets.T)
    lmatch = log(match)
    assert not isnan(lmatch).any()
    both = forwardbackward(lmatch)
    epath = exp(both - amax(both))
    l = sum(epath, axis=0)[newaxis, :]
    epath /= where(l == 0.0, 1e-9, l)
    aligned = maximum(lo, dot(epath, targets))
    l = sum(aligned, axis=1)[:, newaxis]
    aligned /= where(l == 0.0, 1e-9, l)
    return aligned
Beispiel #8
0
def simulated_age_intervals(data_type, n, a, pi_age_true, sigma_true):
    # choose age intervals to measure
    age_start = pl.array(mc.runiform(0, 100, n), dtype=int)
    age_start.sort()  # sort to make it easy to discard the edges when testing
    age_end = pl.array(mc.runiform(age_start+1, pl.minimum(age_start+10,100)), dtype=int)

    # find truth for the integral across the age intervals
    import scipy.integrate
    pi_interval_true = [scipy.integrate.trapz(pi_age_true[a_0i:(a_1i+1)]) / (a_1i - a_0i) 
                        for a_0i, a_1i in zip(age_start, age_end)]

    # generate covariates that add explained variation
    X = mc.rnormal(0., 1.**2, size=(n,3))
    beta_true = [-.1, .1, .2]
    beta_true = [0, 0, 0]
    Y_true = pl.dot(X, beta_true)

    # calculate the true value of the rate in each interval
    pi_true = pi_interval_true*pl.exp(Y_true)

    # simulate the noisy measurement of the rate in each interval
    p = pl.maximum(0., mc.rnormal(pi_true, 1./sigma_true**2.))

    # store the simulated data in a pandas DataFrame
    data = pandas.DataFrame(dict(value=p, age_start=age_start, age_end=age_end,
                                 x_0=X[:,0], x_1=X[:,1], x_2=X[:,2]))
    data['effective_sample_size'] = pl.maximum(p*(1-p)/sigma_true**2, 1.)

    data['standard_error'] = pl.nan
    data['upper_ci'] = pl.nan
    data['lower_ci'] = pl.nan

    data['year_start'] = 2005.  # TODO: make these vary
    data['year_end'] = 2005.
    data['sex'] = 'total'
    data['area'] = 'all'
    data['data_type'] = data_type
    
    return data
def plot_nonlinear_model(m, color='green', label='Nonlinear'):
    X = pl.arange(0., 1., .01)
    tfr_trace = []
    for beta, gamma in zip(m.beta.trace(), m.gamma.trace()):
        y = beta[0] + beta[1]*X + pl.maximum(0., beta[2]*(X-gamma))
        pl.plot(X, y,
                color='gray', alpha=.75, zorder=-1)
        tfr_trace.append(y)

    pl.plot(X, pl.mean(tfr_trace, axis=0),
            color=color, linewidth=5,
            label=label)
    decorate_plot()
Beispiel #10
0
def plot_nonlinear_model(m, color='green', label='Nonlinear'):
    X = pl.arange(0., 1., .01)
    tfr_trace = []
    for beta, gamma in zip(m.beta.trace(), m.gamma.trace()):
        y = beta[0] + beta[1] * X + pl.maximum(0., beta[2] * (X - gamma))
        pl.plot(X, y, color='gray', alpha=.75, zorder=-1)
        tfr_trace.append(y)

    pl.plot(X,
            pl.mean(tfr_trace, axis=0),
            color=color,
            linewidth=5,
            label=label)
    decorate_plot()
Beispiel #11
0
def dense_gauss_kernel(sigma, x, y=None):
    """
    Gaussian Kernel with dense sampling.
    Evaluates a gaussian kernel with bandwidth SIGMA for all displacements
    between input images X and Y, which must both be MxN. They must also
    be periodic (ie., pre-processed with a cosine window). The result is
    an MxN map of responses.

    If X and Y are the same, omit the third parameter to re-use some
    values, which is faster.
    """

    xf = pylab.fft2(x)  # x in Fourier domain
    x_flat = x.flatten()
    xx = pylab.dot(x_flat.transpose(), x_flat)  # squared norm of x

    if y is not None:
        # general case, x and y are different
        yf = pylab.fft2(y)
        y_flat = y.flatten()
        yy = pylab.dot(y_flat.transpose(), y_flat)
    else:
        # auto-correlation of x, avoid repeating a few operations
        yf = xf
        yy = xx

    # cross-correlation term in Fourier domain
    xyf = pylab.multiply(xf, pylab.conj(yf))

    # to spatial domain
    xyf_ifft = pylab.ifft2(xyf)
    #xy_complex = circshift(xyf_ifft, floor(x.shape/2))
    row_shift, col_shift = pylab.floor(pylab.array(x.shape) / 2).astype(int)
    xy_complex = pylab.roll(xyf_ifft, row_shift, axis=0)
    xy_complex = pylab.roll(xy_complex, col_shift, axis=1)
    xy = pylab.real(xy_complex)

    # calculate gaussian response for all positions
    scaling = -1 / (sigma**2)
    xx_yy = xx + yy
    xx_yy_2xy = xx_yy - 2 * xy
    k = pylab.exp(scaling * pylab.maximum(0, xx_yy_2xy / x.size))

    #print("dense_gauss_kernel x.shape ==", x.shape)
    #print("dense_gauss_kernel k.shape ==", k.shape)

    return k
def dense_gauss_kernel(sigma, x, y=None):
    """
    Gaussian Kernel with dense sampling.
    Evaluates a gaussian kernel with bandwidth SIGMA for all displacements
    between input images X and Y, which must both be MxN. They must also
    be periodic (ie., pre-processed with a cosine window). The result is
    an MxN map of responses.

    If X and Y are the same, ommit the third parameter to re-use some
    values, which is faster.
    """

    xf = pylab.fft2(x)  # x in Fourier domain
    x_flat = x.flatten()
    xx = pylab.dot(x_flat.transpose(), x_flat)  # squared norm of x

    if y is not None:
        # general case, x and y are different
        yf = pylab.fft2(y)
        y_flat = y.flatten()
        yy = pylab.dot(y_flat.transpose(), y_flat)
    else:
        # auto-correlation of x, avoid repeating a few operations
        yf = xf
        yy = xx

    # cross-correlation term in Fourier domain
    xyf = pylab.multiply(xf, pylab.conj(yf))

    # to spatial domain
    xyf_ifft = pylab.ifft2(xyf)
    #xy_complex = circshift(xyf_ifft, floor(x.shape/2))
    row_shift, col_shift = pylab.floor(pylab.array(x.shape)/2).astype(int)
    xy_complex = pylab.roll(xyf_ifft, row_shift, axis=0)
    xy_complex = pylab.roll(xy_complex, col_shift, axis=1)
    xy = pylab.real(xy_complex)

    # calculate gaussian response for all positions
    scaling = -1 / (sigma**2)
    xx_yy = xx + yy
    xx_yy_2xy = xx_yy - 2 * xy
    k = pylab.exp(scaling * pylab.maximum(0, xx_yy_2xy / x.size))

    #print("dense_gauss_kernel x.shape ==", x.shape)
    #print("dense_gauss_kernel k.shape ==", k.shape)

    return k
Beispiel #13
0
def dense_gauss_kernel(sigma, x, y=None):
    """
    通过高斯核计算余弦子窗口图像块的响应图
    利用带宽是 sigma 的高斯核估计两个图像块 X (MxN) 和 Y (MxN) 的关系。X, Y 是循环的、经余弦窗处理的。输出结果是
    响应图矩阵 MxN. 如果 X = Y, 则函数调用时取消 y,则加快计算。
    该函数对应原文中的公式 (16),以及算法1中的 function k = dgk(x1, x2, sigma)
    :param sigma: 高斯核带宽
    :param x: 余弦子窗口图像块
    :param y: 空或者模板图像块
    :return: 响应图
    """
    # 计算图像块 x 的傅里叶变换
    xf = pylab.fft2(x)  # x in Fourier domain
    # 把图像块 x 拉平
    x_flat = x.flatten()
    # 计算 x 的2范数平方
    xx = pylab.dot(x_flat.transpose(), x_flat)  # squared norm of x

    if y is not None:
        # 一半情况, x 和 y 是不同的,计算 y 的傅里叶变化和2范数平方
        yf = pylab.fft2(y)
        y_flat = y.flatten()
        yy = pylab.dot(y_flat.transpose(), y_flat)
    else:
        # x 的自相关,避免重复计算
        yf = xf
        yy = xx

    # 傅里叶域的互相关计算,逐元素相乘
    xyf = pylab.multiply(xf, pylab.conj(yf))

    # 转化为频率域
    xyf_ifft = pylab.ifft2(xyf)
    # 对频率域里的矩阵块进行滚动平移,分别沿 row 和 col 轴
    row_shift, col_shift = pylab.floor(pylab.array(x.shape) / 2).astype(int)
    xy_complex = pylab.roll(xyf_ifft, row_shift, axis=0)
    xy_complex = pylab.roll(xy_complex, col_shift, axis=1)
    xy = pylab.real(xy_complex)

    # 计算高斯核响应图
    scaling = -1 / (sigma**2)
    xx_yy = xx + yy
    xx_yy_2xy = xx_yy - 2 * xy

    return pylab.exp(scaling * pylab.maximum(0, xx_yy_2xy / x.size))
Beispiel #14
0
	def plot_slice( self, value, logplot=True, colorbar=False, box=[0,0], nx=200, ny=200, center=False, axes=[0,1], minimum=1e-8, newfig=True ):
		if type( center ) == list:
			center = pylab.array( center )
		elif type( center ) != np.ndarray:
			center = self.center
		
		dim0 = axes[0]
		dim1 = axes[1]
		
		if (box[0] == 0 and box[1] == 0):
			box[0] = max( abs( self.data[ "pos" ][:,dim0] ) ) * 2
			box[1] = max( abs( self.data[ "pos" ][:,dim1] ) ) * 2

		slice = self.get_slice( value, box, nx, ny, center, axes )
		x = (pylab.array( range( nx+1 ) ) - nx/2.) / nx * box[0]
		y = (pylab.array( range( ny+1 ) ) - ny/2.) / ny * box[1]

		if (newfig):
			fig = pylab.figure( figsize = ( 13, int(12*box[1]/box[0] + 0.5) ) )
			pylab.spectral()
		
		if logplot:
			pc = pylab.pcolor( x, y, pylab.transpose( pylab.log10( pylab.maximum( slice, minimum ) ) ), shading='flat' )
		else:
			pc = pylab.pcolor( x, y, pylab.transpose( slice ), shading='flat' )
		if colorbar:
			cb = pylab.colorbar()
		pylab.axis( "image" )

		xticklabels = []
		for tick in pc.axes.get_xticks():
			if (tick == 0):
				xticklabels += [ r'$0.0$' ]
			else:
				xticklabels += [ r'$%.2f \cdot 10^{%d}$' % (tick/10**(ceil(log10(abs(tick)))), ceil(log10(abs(tick)))) ]
		pc.axes.set_xticklabels( xticklabels, size=16, y=-0.1, va='baseline' )

		yticklabels = []
		for tick in pc.axes.get_yticks():
			if (tick == 0):
				yticklabels += [ r'$0.0$' ]
			else:
				yticklabels += [ r'$%.2f \cdot 10^{%d}$' % (tick/10**(ceil(log10(abs(tick)))), ceil(log10(abs(tick)))) ]
		pc.axes.set_yticklabels( yticklabels, size=16, ha='right' )
		return pc
Beispiel #15
0
def test_age_pattern_model_sim():
    # simulate normal data
    a = pl.arange(0, 100, 5)
    pi_true = .0001 * (a * (100. - a) + 100.)
    sigma_true = .025*pl.ones_like(pi_true)

    p = pl.maximum(0., mc.rnormal(pi_true, 1./sigma_true**2.))

    # create model and priors
    vars = {}

    vars.update(age_pattern.age_pattern('test', ages=pl.arange(101), knots=pl.arange(0,101,5), smoothing=.1))

    vars['pi'] = mc.Lambda('pi', lambda mu=vars['mu_age'], a=a: mu[a])
    vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)
Beispiel #16
0
	def plot_cylav( self, value, logplot=True, box=[0,0], nx=512, ny=512, center=False, minimum=1e-8 ):
		if type( center ) == list:
			center = pylab.array( center )
		elif type( center ) != np.ndarray:
			center = self.center
		
		if (box[0] == 0 and box[1] == 0):
			box[0] = max( abs( self.data[ "pos" ][:,0] ) ) * 2
			box[1] = max( abs( self.data[ "pos" ][:,1:] ) ) * 2

		grid = calcGrid.calcGrid( self.pos.astype('float64'), self.data["hsml"].astype('float64'), self.data["mass"].astype('float64'), self.data["rho"].astype('float64'), self.data[value].astype('float64').astype('float64'), nx, ny, ny, box[0], box[1], box[1], 0, 0, 0 )
		cylav = calcGrid.calcCylinderAverage( grid )
		x = (pylab.array( range( nx+1 ) ) - nx/2.) / nx * box[0]
		y = (pylab.array( range( ny+1 ) ) - ny/2.) / ny * box[1]

		fig = pylab.figure( figsize = ( 13, int(12*box[1]/box[0] + 0.5) ) )
		pylab.spectral()
		
		if logplot:
			pc = pylab.pcolor( x, y, pylab.transpose( pylab.log10( pylab.maximum( cylav, minimum ) ) ), shading='flat' )
		else:
			pc = pylab.pcolor( x, y, pylab.transpose( slice ), shading='flat' )

		pylab.axis( "image" )
		xticklabels = []
		for tick in pc.axes.get_xticks():
			if (tick == 0):
				xticklabels += [ r'$0.0$' ]
			else:
				xticklabels += [ r'$%.2f \cdot 10^{%d}$' % (tick/10**(ceil(log10(abs(tick)))), ceil(log10(abs(tick)))) ]
		pc.axes.set_xticklabels( xticklabels, size=16, y=-0.1, va='baseline' )

		yticklabels = []
		for tick in pc.axes.get_yticks():
			if (tick == 0):
				yticklabels += [ r'$0.0$' ]
			else:
				yticklabels += [ r'$%.2f \cdot 10^{%d}$' % (tick/10**(ceil(log10(abs(tick)))), ceil(log10(abs(tick)))) ]
		pc.axes.set_yticklabels( yticklabels, size=16, ha='right' )
		return pc
Beispiel #17
0
def test_age_pattern_model_sim():
    # simulate normal data
    a = pl.arange(0, 100, 5)
    pi_true = .0001 * (a * (100. - a) + 100.)
    sigma_true = .025 * pl.ones_like(pi_true)

    p = pl.maximum(0., mc.rnormal(pi_true, 1. / sigma_true**2.))

    # create model and priors
    vars = {}

    vars.update(
        age_pattern.age_pattern('test',
                                ages=pl.arange(101),
                                knots=pl.arange(0, 101, 5),
                                smoothing=.1))

    vars['pi'] = mc.Lambda('pi', lambda mu=vars['mu_age'], a=a: mu[a])
    vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)
Beispiel #18
0
def validate_rate_model(rate_type='neg_binom', data_type='epilepsy', replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)
    
    # load data
    model = dismod3.data.load('/home/j/Project/dismod/output/dm-32377/')

    data = model.get_data('p')

    #data = data.ix[:20, :]
    
    # replace data with synthetic data if requested
    if data_type == 'epilepsy':
        # no replacement needed
        pass

    elif data_type == 'schiz':
        import pandas as pd
        data = pd.read_csv('/homes/abie/gbd_dev/gbd/tests/schiz.csv')
    
    elif data_type == 'binom':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rbinomial(N, mu, size=len(data.index)) / N

    elif data_type == 'poisson':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rpoisson(N*mu, size=len(data.index)) / N

    elif data_type == 'normal':
        mu = data['value'].mean()
        sigma = .125*mu
        data['standard_error'] = sigma
        data['value'] = mc.rnormal(mu, sigma**-2, size=len(data.index))

    elif data_type == 'log_normal':
        mu = data['value'].mean()
        sigma = .25
        data['standard_error'] = sigma*mu
        data['value'] = pl.exp(mc.rnormal(pl.log(mu), sigma**-2, size=len(data.index)))

    else:
        raise TypeError, 'Unknown data type "%s"' % data_type

    # sample prevalence data
    i_test = mc.rbernoulli(.25, size=len(data.index))
    i_nan = pl.isnan(data['effective_sample_size'])
    
    data['lower_ci'] = pl.nan
    data['upper_ci'] = pl.nan
    data.ix[i_nan, 'effective_sample_size'] = 0.
    data['standard_error'] = pl.sqrt(data['value']*(1-data['value'])) / data['effective_sample_size']
    data.ix[pl.isnan(data['standard_error']), 'standard_error'] = pl.inf

    data['standard_error'][i_test] = pl.inf
    data['effective_sample_size'][i_test] = 0.

    data['value'] = pl.maximum(data['value'], 1.e-12)

    model.input_data = data


    # create model
    # TODO: set parameters in model.parameters['p'] dict
    # then have simple method to create age specific rate model
    #model.parameters['p'] = ...
    #model.vars += dismod3.ism.age_specific_rate(model, 'p')

    model.parameters['p']['parameter_age_mesh'] = [0,100]
    model.parameters['p']['heterogeneity'] = 'Very'
    model.vars['p'] = dismod3.data_model.data_model(
        'p', model, 'p',
        'all', 'total', 'all',
        None, None, None,
        rate_type=rate_type,
        interpolation_method='zero',
        include_covariates=False)
    
    # add upper bound on sigma in log normal model to help convergence
    #if rate_type == 'log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = 1.5

    # add upper bound on sigma, zeta in offset log normal
    #if rate_type == 'offset_log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = .1
    #    model.vars['p']['p_zeta'].value = 5.e-9
    #    model.vars['p']['p_zeta'].parents['upper'] = 1.e-8

    # fit model
    dismod3.fit.fit_asr(model, 'p', iter=20000, thin=10, burn=10000)
    #dismod3.fit.fit_asr(model, 'p', iter=100, thin=1, burn=0)

    # compare estimate to hold-out
    data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    data['lb_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,0]
    data['ub_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,1]

    import data_simulation
    model.test = data[i_test]
    data = model.test
    data['true'] = data['value']
    data_simulation.add_quality_metrics(data)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'test')
    data_simulation.finalize_results(model)


    return model
Beispiel #19
0
def ideal_psp(x, time):
    start, A, tau_1, tau_2, offset = x
    t = p.maximum(time - start, 0)
    return A * (p.exp(-t / tau_2) - p.exp(-t / tau_1)) + offset
Beispiel #20
0
def set_birth_prev(value):
    model.vars['logit_C0'].value = mc.logit(pl.maximum(1.e-9, value))
 def y_mean(beta=beta, gamma=gamma, X=data.hdi2005):
     return beta[0] + beta[1]*X \
         + beta[2]*pl.maximum(0., X-gamma)
Beispiel #22
0
 def smooth_rate(f=rate, age_indices=age_indices, C=C):
     log_rate = pl.log(pl.maximum(f, NEARLY_ZERO))
     return mc.mv_normal_cov_like(log_rate[age_indices] -
                                  log_rate[age_indices].mean(),
                                  pl.zeros_like(age_indices),
                                  C=C)
Beispiel #23
0
def trim(x, a, b):
    return pl.maximum(a, pl.minimum(b, x))
Beispiel #24
0
def wide_angle_function(angles):
    return pylab.maximum(pylab.minimum(1.05-2*pylab.absolute(angles),1), 0)
# Modify MEC values
modify_icer = False
modify_mec = False
if modify_mec:
    sigma = 1.0
    for r in range(P.intervsets[0].data.nrows):
        row = P.intervsets[0].data['parsedbc', r]
        for i, val in enumerate(row):
            mec = val[1]
            #        newmec = min(1.0, 0.5*mec)
            newmec = pl.median([0, (1 + sigma * pl.randn()) * mec, 1])
            val[1] = newmec
if modify_icer:
    sigma = 0.5
    rands = 1 + sigma * pl.randn(P.intervsets[0].data.nrows)
    rands = pl.maximum(rands, 0.1)
    P.intervsets[0].data['ICER'] *= rands

bod_data = sc.loadobj('gbd-data.dat')
country_data = sc.loadspreadsheet('country-data.xlsx')
baseline_factor = country_data.findrow(
    'Zambia', asdict=True)['icer_multiplier']  # Zambia was used for this

# Replace with actual burden data
for k, key in enumerate(['DALYs', 'Deaths', 'Prevalence']):
    for b, burden in enumerate(P.burden().data['Cause'].tolist()):
        P.burden().data[key, b] = bod_data[country][k][burden]

# Adjust interventions
c = country_data['name'].tolist().index(country)
for key in ['Unit cost', 'ICER']:
Beispiel #26
0
### @export 'data'
n = pl.array(pl.exp(mc.rnormal(11, 1**-2, size=32)), dtype=int)
k = pl.array(mc.rnegative_binomial(n*pi_true, delta_true), dtype=float)
k[:4] = 0. # zero-inflated model
r = k/n
s = pl.sqrt(r * (1-r) / n)

n_min = min(n)
n_max = max(n)


### @export 'zibb-model'
alpha = mc.Uninformative('alpha', value=4.)
beta = mc.Uninformative('beta', value=1000.)
pi_mean = mc.Lambda('pi_mean', lambda alpha=alpha, beta=beta: alpha/(alpha+beta))
pi = mc.Beta('pi', alpha, beta, value=pl.maximum(1.e-12, pl.minimum(1-1.e-12, r)))
phi = mc.Uniform('phi', lower=0., upper=1., value=.01)

nonzeros = r != 0.
num_nonzeros = nonzeros.sum()
@mc.potential
def obs(pi=pi, phi=phi):
    logp = pl.log(1-phi)*num_nonzeros + mc.binomial_like(r[nonzeros]*n[nonzeros], n[nonzeros], pi[nonzeros])
    for n_i in n[~nonzeros]:
        logp += pl.log(phi + (1-phi) * pl.exp(pl.log(1-pi[~nonzeros]) * n[~nonzeros])).sum()
    return logp

@mc.deterministic
def pred(alpha=alpha, beta=beta, phi=phi):
    if pl.rand() < phi:
        return 0
Beispiel #27
0
def wide_angle_function(angles):
    return pylab.maximum(pylab.minimum(1.05 - 2 * pylab.absolute(angles), 1),
                         0)
Beispiel #28
0
def test_data_model_sim():
    # generate simulated data
    n = 50
    sigma_true = .025

    # start with truth
    a = pl.arange(0, 100, 1)
    pi_age_true = .0001 * (a * (100. - a) + 100.)

    # choose age intervals to measure
    age_start = pl.array(mc.runiform(0, 100, n), dtype=int)
    age_start.sort()  # sort to make it easy to discard the edges when testing
    age_end = pl.array(mc.runiform(age_start+1, pl.minimum(age_start+10,100)), dtype=int)

    # find truth for the integral across the age intervals
    import scipy.integrate
    pi_interval_true = [scipy.integrate.trapz(pi_age_true[a_0i:(a_1i+1)]) / (a_1i - a_0i) 
                        for a_0i, a_1i in zip(age_start, age_end)]

    # generate covariates that add explained variation
    X = mc.rnormal(0., 1.**2, size=(n,3))
    beta_true = [-.1, .1, .2]
    Y_true = pl.dot(X, beta_true)

    # calculate the true value of the rate in each interval
    pi_true = pi_interval_true*pl.exp(Y_true)

    # simulate the noisy measurement of the rate in each interval
    p = mc.rnormal(pi_true, 1./sigma_true**2.)

    # store the simulated data in a pandas DataFrame
    data = pandas.DataFrame(dict(value=p, age_start=age_start, age_end=age_end,
                                 x_0=X[:,0], x_1=X[:,1], x_2=X[:,2]))
    data['effective_sample_size'] = pl.maximum(p*(1-p)/sigma_true**2, 1.)

    data['standard_error'] = pl.nan
    data['upper_ci'] = pl.nan
    data['lower_ci'] = pl.nan

    data['year_start'] = 2005.  # TODO: make these vary
    data['year_end'] = 2005.
    data['sex'] = 'total'
    data['area'] = 'all'

    # generate a moderately complicated hierarchy graph for the model
    hierarchy = nx.DiGraph()
    hierarchy.add_node('all')
    hierarchy.add_edge('all', 'super-region-1', weight=.1)
    hierarchy.add_edge('super-region-1', 'NAHI', weight=.1)
    hierarchy.add_edge('NAHI', 'CAN', weight=.1)
    hierarchy.add_edge('NAHI', 'USA', weight=.1)
    output_template=pandas.DataFrame(dict(year=[1990, 1990, 2005, 2005, 2010, 2010]*2,
                                          sex=['male', 'female']*3*2,
                                          x_0=[.5]*6*2,
                                          x_1=[0.]*6*2,
                                          x_2=[.5]*6*2,
                                          pop=[50.]*6*2,
                                          area=['CAN']*6 + ['USA']*6))
    

    # create model and priors
    vars = data_model.data_model('test', data, hierarchy, 'all')


    # fit model
    mc.MAP(vars).fit(method='fmin_powell', verbose=1)
    m = mc.MCMC(vars)
    m.use_step_method(mc.AdaptiveMetropolis, [m.gamma_bar, m.gamma, m.beta])
    m.sample(30000, 15000, 15)

    # check estimates
    pi_usa = data_model.predict_for(output_template, hierarchy, 'all', 'USA', 'male', 1990, vars)
    assert pl.allclose(pi_usa.mean(), (m.mu_age.trace()*pl.exp(.05)).mean(), rtol=.1)

    # check convergence
    print 'gamma mc error:', m.gamma_bar.stats()['mc error'].round(2), m.gamma.stats()['mc error'].round(2)


    # plot results
    for a_0i, a_1i, p_i in zip(age_start, age_end, p):
        pl.plot([a_0i, a_1i], [p_i,p_i], 'rs-', mew=1, mec='w', ms=4)
    pl.plot(a, pi_age_true, 'g-', linewidth=2)
    pl.plot(pl.arange(101), m.mu_age.stats()['mean'], 'k-', drawstyle='steps-post', linewidth=3)
    pl.plot(pl.arange(101), m.mu_age.stats()['95% HPD interval'], 'k', linestyle='steps-post:')
    pl.plot(pl.arange(101), pi_usa.mean(0), 'r-', linewidth=2, drawstyle='steps-post')
    pl.savefig('age_integrating_sim.png')

    # compare estimate to ground truth (skip endpoints, because they are extra hard to get right)
    assert pl.allclose(m.pi.stats()['mean'][10:-10], pi_true[10:-10], rtol=.2)
    lb, ub = m.pi.stats()['95% HPD interval'].T
    assert pl.mean((lb <= pi_true)[10:-10] & (pi_true <= ub)[10:-10]) > .75
Beispiel #29
0
def f(x):
    return (pl.cos(x)**2) / pl.sqrt(pl.maximum(1, 2 * x - 1))
Beispiel #30
0
def mean_covariate_model(name, mu, input_data, parameters, model, root_area, root_sex, root_year, zero_re=True):
    """ Generate PyMC objects covariate adjusted version of mu

    :Parameters:
      - `name` : str
      - `mu` : the unadjusted mean parameter for this node
      - `model` : ModelData to use for covariates
      - `root_area, root_sex, root_year` : str, str, int
      - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic

    :Results:
      - Returns dict of PyMC objects, including 'pi', the covariate adjusted predicted values for the mu and X provided

    """
    n = len(input_data.index)

    # make U and alpha
    p_U = model.hierarchy.number_of_nodes()  # random effects for area
    U = pandas.DataFrame(pl.zeros((n, p_U)), columns=model.hierarchy.nodes(), index=input_data.index)
    for i, row in input_data.T.iteritems():
        if row['area'] not in model.hierarchy:
            print 'WARNING: "%s" not in model hierarchy, skipping random effects for this observation' % row['area']
            continue
        
        for level, node in enumerate(nx.shortest_path(model.hierarchy, 'all', input_data.ix[i, 'area'])):
            model.hierarchy.node[node]['level'] = level
            U.ix[i, node] = 1.
            
    for n2 in model.hierarchy.nodes():
        for level, node in enumerate(nx.shortest_path(model.hierarchy, 'all', n2)):
                        model.hierarchy.node[node]['level'] = level
                        
    #U = U.select(lambda col: U[col].std() > 1.e-5, axis=1)  # drop constant columns
    if len(U.index) == 0:
        U = pandas.DataFrame()
    else:
        U = U.select(lambda col: (U[col].max() > 0) and (model.hierarchy.node[col].get('level') > model.hierarchy.node[root_area]['level']), axis=1)  # drop columns with only zeros and which are for higher levels in hierarchy
        #U = U.select(lambda col: model.hierarchy.node[col].get('level') <= 2, axis=1)  # drop country-level REs
        #U = U.drop(['super-region_0', 'north_america_high_income', 'USA'], 1)

        #U = U.drop(['super-region_0', 'north_america_high_income'], 1)
        #U = U.drop(U.columns, 1)


        ## drop random effects with less than 1 observation or with all observations set to 1, unless they have an informative prior
        keep = []
        if 'random_effects' in parameters:
            for re in parameters['random_effects']:
                if parameters['random_effects'][re].get('dist') == 'Constant':
                    keep.append(re)
        U = U.select(lambda col: 1 <= U[col].sum() < len(U[col]) or col in keep, axis=1)


    U_shift = pandas.Series(0., index=U.columns)
    for level, node in enumerate(nx.shortest_path(model.hierarchy, 'all', root_area)):
        if node in U_shift:
            U_shift[node] = 1.
    U = U - U_shift

    sigma_alpha = []
    for i in range(5):  # max depth of hierarchy is 5
        effect = 'sigma_alpha_%s_%d'%(name,i)
        if 'random_effects' in parameters and effect in parameters['random_effects']:
            prior = parameters['random_effects'][effect]
            print 'using stored RE hyperprior for', effect, prior 
            sigma_alpha.append(MyTruncatedNormal(effect, prior['mu'], pl.maximum(prior['sigma'], .001)**-2,
                                                  min(prior['mu'], prior['lower']),
                                                  max(prior['mu'], prior['upper']),
                                                  value=prior['mu']))
        else:
            sigma_alpha.append(MyTruncatedNormal(effect, .05, .03**-2, .05, .5, value=.1))
    
    alpha = pl.array([])
    const_alpha_sigma = pl.array([])
    alpha_potentials = []
    if len(U.columns) > 0:
        tau_alpha_index = []
        for alpha_name in U.columns:
            tau_alpha_index.append(model.hierarchy.node[alpha_name]['level'])
        tau_alpha_index=pl.array(tau_alpha_index, dtype=int)

        tau_alpha_for_alpha = [sigma_alpha[i]**-2 for i in tau_alpha_index]

        alpha = []
        for i, tau_alpha_i in enumerate(tau_alpha_for_alpha):
            effect = 'alpha_%s_%s'%(name, U.columns[i])
            if 'random_effects' in parameters and U.columns[i] in parameters['random_effects']:
                prior = parameters['random_effects'][U.columns[i]]
                print 'using stored RE for', effect, prior
                if prior['dist'] == 'Normal':
                    alpha.append(mc.Normal(effect, prior['mu'], pl.maximum(prior['sigma'], .001)**-2,
                                           value=0.))
                elif prior['dist'] == 'TruncatedNormal':
                    alpha.append(MyTruncatedNormal(effect, prior['mu'], pl.maximum(prior['sigma'], .001)**-2,
                                                   prior['lower'], prior['upper'], value=0.))
                elif prior['dist'] == 'Constant':
                    alpha.append(float(prior['mu']))
                else:
                    assert 0, 'ERROR: prior distribution "%s" is not implemented' % prior['dist']
            else:
                alpha.append(mc.Normal(effect, 0, tau=tau_alpha_i, value=0))

        # sigma for "constant" alpha
        const_alpha_sigma = []
        for i, tau_alpha_i in enumerate(tau_alpha_for_alpha):
            effect = 'alpha_%s_%s'%(name, U.columns[i])
            if 'random_effects' in parameters and U.columns[i] in parameters['random_effects']:
                prior = parameters['random_effects'][U.columns[i]]
                if prior['dist'] == 'Constant':
                    const_alpha_sigma.append(float(prior['sigma']))
                else:
                    const_alpha_sigma.append(pl.nan)
            else:
                const_alpha_sigma.append(pl.nan)
                
        if zero_re:
            column_map = dict([(n,i) for i,n in enumerate(U.columns)])
            # change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic
            for parent in model.hierarchy:
                node_names = model.hierarchy.successors(parent)
                nodes = [column_map[n] for n in node_names if n in U]
                if len(nodes) > 0:
                    i = nodes[0]
                    old_alpha_i = alpha[i]

                    # do not change if prior for this node has dist='constant'
                    if parameters.get('random_effects', {}).get(U.columns[i], {}).get('dist') == 'Constant':
                        continue

                    alpha[i] = mc.Lambda('alpha_det_%s_%d'%(name, i),
                                                lambda other_alphas_at_this_level=[alpha[n] for n in nodes[1:]]: -sum(other_alphas_at_this_level))

                    if isinstance(old_alpha_i, mc.Stochastic):
                        @mc.potential(name='alpha_pot_%s_%s'%(name, U.columns[i]))
                        def alpha_potential(alpha=alpha[i], mu=old_alpha_i.parents['mu'], tau=old_alpha_i.parents['tau']):
                            return mc.normal_like(alpha, mu, tau)
                        alpha_potentials.append(alpha_potential)

    # make X and beta
    X = input_data.select(lambda col: col.startswith('x_'), axis=1)

    # add sex as a fixed effect (TODO: decide if this should be in data.py, when loading gbd model)
    X['x_sex'] = [sex_value[row['sex']] for i, row in input_data.T.iteritems()]

    beta = pl.array([])
    const_beta_sigma = pl.array([])
    X_shift = pandas.Series(0., index=X.columns)
    if len(X.columns) > 0:
        # shift columns to have zero for root covariate
        try:
            output_template = model.output_template.groupby(['area', 'sex', 'year']).mean()  # TODO: change to .first(), but that doesn't work with old pandas
        except pandas.core.groupby.DataError:
            output_template = model.output_template.groupby(['area', 'sex', 'year']).first()
        covs = output_template.filter(list(X.columns) + ['pop'])
        if len(covs.columns) > 1:
            leaves = [n for n in nx.traversal.bfs_tree(model.hierarchy, root_area) if model.hierarchy.successors(n) == []]
            if len(leaves) == 0:
                # networkx returns an empty list when the bfs tree is a single node
                leaves = [root_area]

            if root_sex == 'total' and root_year == 'all':  # special case for all years and sexes
                covs = covs.delevel().drop(['year', 'sex'], axis=1).groupby('area').mean()  # TODO: change to .reset_index(), but that doesn't work with old pandas
                leaf_covs = covs.ix[leaves]
            elif root_sex == 'total':
                raise Exception, 'root_sex == total, root_year != all is Not Yet Implemented'
            elif root_year == 'all':
                raise Exception, 'root_year == all, root_sex != total is Not Yet Implemented'
            else:
                leaf_covs = covs.ix[[(l, root_sex, root_year) for l in leaves]]

            for cov in covs:
                if cov != 'pop':
                    X_shift[cov] = (leaf_covs[cov] * leaf_covs['pop']).sum() / leaf_covs['pop'].sum()

        if 'x_sex' in X.columns:
            X_shift['x_sex'] = sex_value[root_sex]

        X = X - X_shift

        assert not pl.any(pl.isnan(X.__array__())), 'Covariate matrix should have no missing values'

        beta = []
        for i, effect in enumerate(X.columns):
            name_i = 'beta_%s_%s'%(name, effect)
            if 'fixed_effects' in parameters and effect in parameters['fixed_effects']:
                prior = parameters['fixed_effects'][effect]
                print 'using stored FE for', name_i, effect, prior
                if prior['dist'] == 'TruncatedNormal':
                    beta.append(MyTruncatedNormal(name_i, mu=float(prior['mu']), tau=pl.maximum(prior['sigma'], .001)**-2, a=prior['lower'], b=prior['upper'], value=.5*(prior['lower']+prior['upper'])))
                elif prior['dist'] == 'Normal':
                    beta.append(mc.Normal(name_i, mu=float(prior['mu']), tau=pl.maximum(prior['sigma'], .001)**-2, value=float(prior['mu'])))
                elif prior['dist'] == 'Constant':
                    beta.append(float(prior['mu']))
                else:
                    assert 0, 'ERROR: prior distribution "%s" is not implemented' % prior['dist']
            else:
                beta.append(mc.Normal(name_i, mu=0., tau=1.**-2, value=0))

        # sigma for "constant" beta
        const_beta_sigma = []
        for i, effect in enumerate(X.columns):
            name_i = 'beta_%s_%s'%(name, effect)
            if 'fixed_effects' in parameters and effect in parameters['fixed_effects']:
                prior = parameters['fixed_effects'][effect]
                if prior['dist'] == 'Constant':
                    const_beta_sigma.append(float(prior.get('sigma', 1.e-6)))
                else:
                    const_beta_sigma.append(pl.nan)
            else:
                const_beta_sigma.append(pl.nan)
                
    @mc.deterministic(name='pi_%s'%name)
    def pi(mu=mu, U=pl.array(U, dtype=float), alpha=alpha, X=pl.array(X, dtype=float), beta=beta):
        return mu * pl.exp(pl.dot(U, [float(x) for x in alpha]) + pl.dot(X, [float(x) for x in beta]))

    return dict(pi=pi, U=U, U_shift=U_shift, sigma_alpha=sigma_alpha, alpha=alpha, alpha_potentials=alpha_potentials, X=X, X_shift=X_shift, beta=beta, hierarchy=model.hierarchy, const_alpha_sigma=const_alpha_sigma, const_beta_sigma=const_beta_sigma)
Beispiel #31
0
 def gamma(mu=mu, Xa=X_region, Xb=X_study, alpha=alpha, beta=beta):
     return pl.log(pl.maximum(dismod3.settings.NEARLY_ZERO, mu)) - pl.dot(alpha, Xa) - pl.dot(beta, Xb)
Beispiel #32
0
def set_rate(rate_type, value):
    t = {'incidence':'i', 'remission': 'r', 'excess-mortality': 'f'}[rate_type]
    for i, k_i in enumerate(model.vars[t]['knots']):
        model.vars[t]['gamma'][i].value = pl.log(pl.maximum(1.e-9, value[i]))
Beispiel #33
0
 def new_bounds_func(f,
                     age,
                     val=val,
                     prev_bounds_func=rate_vars['bounds_func']):
     return pl.maximum(prev_bounds_func(f, age), val)
Beispiel #34
0
def validate_rate_model(rate_type='neg_binom',
                        data_type='epilepsy',
                        replicate=0):
    # set random seed for reproducibility
    mc.np.random.seed(1234567 + replicate)

    # load data
    model = dismod3.data.load('/home/j/Project/dismod/output/dm-32377/')

    data = model.get_data('p')

    #data = data.ix[:20, :]

    # replace data with synthetic data if requested
    if data_type == 'epilepsy':
        # no replacement needed
        pass

    elif data_type == 'schiz':
        import pandas as pd
        data = pd.read_csv('/homes/abie/gbd_dev/gbd/tests/schiz.csv')

    elif data_type == 'binom':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rbinomial(N, mu, size=len(data.index)) / N

    elif data_type == 'poisson':
        N = 1.e6
        data['effective_sample_size'] = N
        mu = data['value'].mean()
        data['value'] = mc.rpoisson(N * mu, size=len(data.index)) / N

    elif data_type == 'normal':
        mu = data['value'].mean()
        sigma = .125 * mu
        data['standard_error'] = sigma
        data['value'] = mc.rnormal(mu, sigma**-2, size=len(data.index))

    elif data_type == 'log_normal':
        mu = data['value'].mean()
        sigma = .25
        data['standard_error'] = sigma * mu
        data['value'] = pl.exp(
            mc.rnormal(pl.log(mu), sigma**-2, size=len(data.index)))

    else:
        raise TypeError, 'Unknown data type "%s"' % data_type

    # sample prevalence data
    i_test = mc.rbernoulli(.25, size=len(data.index))
    i_nan = pl.isnan(data['effective_sample_size'])

    data['lower_ci'] = pl.nan
    data['upper_ci'] = pl.nan
    data.ix[i_nan, 'effective_sample_size'] = 0.
    data['standard_error'] = pl.sqrt(
        data['value'] * (1 - data['value'])) / data['effective_sample_size']
    data.ix[pl.isnan(data['standard_error']), 'standard_error'] = pl.inf

    data['standard_error'][i_test] = pl.inf
    data['effective_sample_size'][i_test] = 0.

    data['value'] = pl.maximum(data['value'], 1.e-12)

    model.input_data = data

    # create model
    # TODO: set parameters in model.parameters['p'] dict
    # then have simple method to create age specific rate model
    #model.parameters['p'] = ...
    #model.vars += dismod3.ism.age_specific_rate(model, 'p')

    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.parameters['p']['heterogeneity'] = 'Very'
    model.vars['p'] = dismod3.data_model.data_model(
        'p',
        model,
        'p',
        'all',
        'total',
        'all',
        None,
        None,
        None,
        rate_type=rate_type,
        interpolation_method='zero',
        include_covariates=False)

    # add upper bound on sigma in log normal model to help convergence
    #if rate_type == 'log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = 1.5

    # add upper bound on sigma, zeta in offset log normal
    #if rate_type == 'offset_log_normal':
    #    model.vars['p']['sigma'].parents['upper'] = .1
    #    model.vars['p']['p_zeta'].value = 5.e-9
    #    model.vars['p']['p_zeta'].parents['upper'] = 1.e-8

    # fit model
    dismod3.fit.fit_asr(model, 'p', iter=20000, thin=10, burn=10000)
    #dismod3.fit.fit_asr(model, 'p', iter=100, thin=1, burn=0)

    # compare estimate to hold-out
    data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    data['lb_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,
                                                                            0]
    data['ub_pred'] = model.vars['p']['p_pred'].stats()['95% HPD interval'][:,
                                                                            1]

    import data_simulation
    model.test = data[i_test]
    data = model.test
    data['true'] = data['value']
    data_simulation.add_quality_metrics(data)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'test')
    data_simulation.finalize_results(model)

    return model
Beispiel #35
0
def f(x):
    return (pylab.cos(x)**2) / pylab.sqrt(pylab.maximum(1, 2 * x - 1))
Beispiel #36
0
 def new_bounds_func(f, age, val=val, prev_bounds_func=rate_vars['bounds_func']):
     return pl.maximum(prev_bounds_func(f, age), val)
Beispiel #37
0
def log_add(x, y):
    return where(
        abs(x - y) > 10, maximum(x, y),
        log(exp(clip(x - y, -20, 20)) + 1) + y)
Beispiel #38
0
def create_fig4_5():
    def create_data1():
        def f(x):
            return np.random.normal(0, 0.4) + x + 4.
        x1 = 4. * np.random.random_sample(DATA_SIZE,) - 4.
        x2 = np.array(map(f, x1))
        t = np.array([[1, 0, 0] for i in xrange(DATA_SIZE)])
        return np.array(zip(x1, x2)), t

    def create_data2():
        def f(x):
            return np.random.normal(0, 0.4) + x
        x1 = 4. * np.random.random_sample(DATA_SIZE,) - 2.
        x2 = np.array(map(f, x1))
        t = np.array([[0, 1, 0] for i in xrange(DATA_SIZE)])
        return np.array(zip(x1, x2)), t

    def create_data3():
        def f(x):
            return np.random.normal(0, 0.4) + x - 4.
        x1 = 4. * np.random.random_sample(DATA_SIZE,)
        x2 = np.array(map(f, x1))
        t = np.array([[0, 0, 1] for i in xrange(DATA_SIZE)])
        return np.array(zip(x1, x2)), t

    X1, T1 = create_data1()
    X2, T2 = create_data2()
    X3, T3 = create_data3()

    W1 = calc_weight(np.r_[X1, X2, X3], np.r_[T1, T2, T3])

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize = (14, 6))
    ax1.grid(True)
    ax2.grid(True)
    plt.subplots_adjust(wspace = 0.4)
    ax1.set_xlim(-6, 6)
    ax1.set_ylim(-6, 6)
    ax2.set_xlim(-6, 6)
    ax2.set_ylim(-6, 6)

    x = np.arange(-10, 10, 0.1)
    x_lower = np.arange(-10, 0, 0.1)
    x_higher = np.arange(0, 10, 0.1)
    border_func1 = get_border(W1[:,:2])
    border1 = np.array(map(border_func1, x))
    ax1.plot(x_lower, map(border_func1, x_lower), 'k')

    border_func2 = get_border(W1[:, 1:])
    border2 = np.array(map(border_func2, x))
    ax1.plot(x_lower, map(border_func2, x_lower), 'k')

    border_func3 = get_border(W1[:, 0::2])
    border3 = np.array(map(border_func3, x))
    ax1.plot(x_higher, map(border_func3, x_higher), 'k')

    ax1.fill_between(x, border1, border2, where=border2>border1, facecolor = 'g', alpha = 0.2)
    ax1.fill_between(x, maximum(border2, border3), 10, facecolor = 'r', alpha = 0.2)
    ax1.fill_between(x, minimum(border1, border3), -10, facecolor = 'b', alpha = 0.2)

    #border_func2 = get_border(W2)
    #ax2.plot(x, map(border_func2, x), 'm')

    ax1.scatter(X1[:,0], X1[:,1], s = 50, c = 'r', marker = "x")
    ax1.scatter(X2[:,0], X2[:,1], s = 50, c = 'g', marker = "x")
    ax1.scatter(X3[:,0], X3[:,1], s = 50, edgecolors = 'b', marker = "o", facecolors= 'none')

    ax2.scatter(X1[:,0], X1[:,1], s = 50, c = 'r', marker = "x")
    ax2.scatter(X2[:,0], X2[:,1], s = 50, c = 'g', marker = "x")
    ax2.scatter(X3[:,0], X3[:,1], s = 50, edgecolors = 'b', marker = "o", facecolors= 'none')

    plt.show()
Beispiel #39
0
def mean_covariate_model(name,
                         mu,
                         input_data,
                         parameters,
                         model,
                         root_area,
                         root_sex,
                         root_year,
                         zero_re=True):
    """ Generate PyMC objects covariate adjusted version of mu

    :Parameters:
      - `name` : str
      - `mu` : the unadjusted mean parameter for this node
      - `model` : ModelData to use for covariates
      - `root_area, root_sex, root_year` : str, str, int
      - `zero_re` : boolean, change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic

    :Results:
      - Returns dict of PyMC objects, including 'pi', the covariate adjusted predicted values for the mu and X provided

    """
    n = len(input_data.index)

    # make U and alpha
    p_U = model.hierarchy.number_of_nodes()  # random effects for area
    U = pandas.DataFrame(pl.zeros((n, p_U)),
                         columns=model.hierarchy.nodes(),
                         index=input_data.index)
    for i, row in input_data.T.iteritems():
        if row['area'] not in model.hierarchy:
            print 'WARNING: "%s" not in model hierarchy, skipping random effects for this observation' % row[
                'area']
            continue

        for level, node in enumerate(
                nx.shortest_path(model.hierarchy, 'all',
                                 input_data.ix[i, 'area'])):
            model.hierarchy.node[node]['level'] = level
            U.ix[i, node] = 1.

    for n2 in model.hierarchy.nodes():
        for level, node in enumerate(
                nx.shortest_path(model.hierarchy, 'all', n2)):
            model.hierarchy.node[node]['level'] = level

    #U = U.select(lambda col: U[col].std() > 1.e-5, axis=1)  # drop constant columns
    if len(U.index) == 0:
        U = pandas.DataFrame()
    else:
        U = U.select(
            lambda col: (U[col].max() > 0) and (model.hierarchy.node[col].get(
                'level') > model.hierarchy.node[root_area]['level']),
            axis=1
        )  # drop columns with only zeros and which are for higher levels in hierarchy
        #U = U.select(lambda col: model.hierarchy.node[col].get('level') <= 2, axis=1)  # drop country-level REs
        #U = U.drop(['super-region_0', 'north_america_high_income', 'USA'], 1)

        #U = U.drop(['super-region_0', 'north_america_high_income'], 1)
        #U = U.drop(U.columns, 1)

        ## drop random effects with less than 1 observation or with all observations set to 1, unless they have an informative prior
        keep = []
        if 'random_effects' in parameters:
            for re in parameters['random_effects']:
                if parameters['random_effects'][re].get('dist') == 'Constant':
                    keep.append(re)
        U = U.select(
            lambda col: 1 <= U[col].sum() < len(U[col]) or col in keep, axis=1)

    U_shift = pandas.Series(0., index=U.columns)
    for level, node in enumerate(
            nx.shortest_path(model.hierarchy, 'all', root_area)):
        if node in U_shift:
            U_shift[node] = 1.
    U = U - U_shift

    sigma_alpha = []
    for i in range(5):  # max depth of hierarchy is 5
        effect = 'sigma_alpha_%s_%d' % (name, i)
        if 'random_effects' in parameters and effect in parameters[
                'random_effects']:
            prior = parameters['random_effects'][effect]
            print 'using stored RE hyperprior for', effect, prior
            sigma_alpha.append(
                MyTruncatedNormal(effect,
                                  prior['mu'],
                                  pl.maximum(prior['sigma'], .001)**-2,
                                  min(prior['mu'], prior['lower']),
                                  max(prior['mu'], prior['upper']),
                                  value=prior['mu']))
        else:
            sigma_alpha.append(
                MyTruncatedNormal(effect, .05, .03**-2, .05, .5, value=.1))

    alpha = pl.array([])
    const_alpha_sigma = pl.array([])
    alpha_potentials = []
    if len(U.columns) > 0:
        tau_alpha_index = []
        for alpha_name in U.columns:
            tau_alpha_index.append(model.hierarchy.node[alpha_name]['level'])
        tau_alpha_index = pl.array(tau_alpha_index, dtype=int)

        tau_alpha_for_alpha = [sigma_alpha[i]**-2 for i in tau_alpha_index]

        alpha = []
        for i, tau_alpha_i in enumerate(tau_alpha_for_alpha):
            effect = 'alpha_%s_%s' % (name, U.columns[i])
            if 'random_effects' in parameters and U.columns[i] in parameters[
                    'random_effects']:
                prior = parameters['random_effects'][U.columns[i]]
                print 'using stored RE for', effect, prior
                if prior['dist'] == 'Normal':
                    alpha.append(
                        mc.Normal(effect,
                                  prior['mu'],
                                  pl.maximum(prior['sigma'], .001)**-2,
                                  value=0.))
                elif prior['dist'] == 'TruncatedNormal':
                    alpha.append(
                        MyTruncatedNormal(effect,
                                          prior['mu'],
                                          pl.maximum(prior['sigma'], .001)**-2,
                                          prior['lower'],
                                          prior['upper'],
                                          value=0.))
                elif prior['dist'] == 'Constant':
                    alpha.append(float(prior['mu']))
                else:
                    assert 0, 'ERROR: prior distribution "%s" is not implemented' % prior[
                        'dist']
            else:
                alpha.append(mc.Normal(effect, 0, tau=tau_alpha_i, value=0))

        # sigma for "constant" alpha
        const_alpha_sigma = []
        for i, tau_alpha_i in enumerate(tau_alpha_for_alpha):
            effect = 'alpha_%s_%s' % (name, U.columns[i])
            if 'random_effects' in parameters and U.columns[i] in parameters[
                    'random_effects']:
                prior = parameters['random_effects'][U.columns[i]]
                if prior['dist'] == 'Constant':
                    const_alpha_sigma.append(float(prior['sigma']))
                else:
                    const_alpha_sigma.append(pl.nan)
            else:
                const_alpha_sigma.append(pl.nan)

        if zero_re:
            column_map = dict([(n, i) for i, n in enumerate(U.columns)])
            # change one stoch from each set of siblings in area hierarchy to a 'sum to zero' deterministic
            for parent in model.hierarchy:
                node_names = model.hierarchy.successors(parent)
                nodes = [column_map[n] for n in node_names if n in U]
                if len(nodes) > 0:
                    i = nodes[0]
                    old_alpha_i = alpha[i]

                    # do not change if prior for this node has dist='constant'
                    if parameters.get('random_effects',
                                      {}).get(U.columns[i],
                                              {}).get('dist') == 'Constant':
                        continue

                    alpha[i] = mc.Lambda(
                        'alpha_det_%s_%d' % (name, i),
                        lambda other_alphas_at_this_level=
                        [alpha[n]
                         for n in nodes[1:]]: -sum(other_alphas_at_this_level))

                    if isinstance(old_alpha_i, mc.Stochastic):

                        @mc.potential(name='alpha_pot_%s_%s' %
                                      (name, U.columns[i]))
                        def alpha_potential(alpha=alpha[i],
                                            mu=old_alpha_i.parents['mu'],
                                            tau=old_alpha_i.parents['tau']):
                            return mc.normal_like(alpha, mu, tau)

                        alpha_potentials.append(alpha_potential)

    # make X and beta
    X = input_data.select(lambda col: col.startswith('x_'), axis=1)

    # add sex as a fixed effect (TODO: decide if this should be in data.py, when loading gbd model)
    X['x_sex'] = [sex_value[row['sex']] for i, row in input_data.T.iteritems()]

    beta = pl.array([])
    const_beta_sigma = pl.array([])
    X_shift = pandas.Series(0., index=X.columns)
    if len(X.columns) > 0:
        # shift columns to have zero for root covariate
        try:
            output_template = model.output_template.groupby([
                'area', 'sex', 'year'
            ]).mean(
            )  # TODO: change to .first(), but that doesn't work with old pandas
        except pandas.core.groupby.DataError:
            output_template = model.output_template.groupby(
                ['area', 'sex', 'year']).first()
        covs = output_template.filter(list(X.columns) + ['pop'])
        if len(covs.columns) > 1:
            leaves = [
                n for n in nx.traversal.bfs_tree(model.hierarchy, root_area)
                if model.hierarchy.successors(n) == []
            ]
            if len(leaves) == 0:
                # networkx returns an empty list when the bfs tree is a single node
                leaves = [root_area]

            if root_sex == 'total' and root_year == 'all':  # special case for all years and sexes
                covs = covs.delevel().drop([
                    'year', 'sex'
                ], axis=1).groupby('area').mean(
                )  # TODO: change to .reset_index(), but that doesn't work with old pandas
                leaf_covs = covs.ix[leaves]
            elif root_sex == 'total':
                raise Exception, 'root_sex == total, root_year != all is Not Yet Implemented'
            elif root_year == 'all':
                raise Exception, 'root_year == all, root_sex != total is Not Yet Implemented'
            else:
                leaf_covs = covs.ix[[(l, root_sex, root_year) for l in leaves]]

            for cov in covs:
                if cov != 'pop':
                    X_shift[cov] = (leaf_covs[cov] * leaf_covs['pop']
                                    ).sum() / leaf_covs['pop'].sum()

        if 'x_sex' in X.columns:
            X_shift['x_sex'] = sex_value[root_sex]

        X = X - X_shift

        assert not pl.any(pl.isnan(
            X.__array__())), 'Covariate matrix should have no missing values'

        beta = []
        for i, effect in enumerate(X.columns):
            name_i = 'beta_%s_%s' % (name, effect)
            if 'fixed_effects' in parameters and effect in parameters[
                    'fixed_effects']:
                prior = parameters['fixed_effects'][effect]
                print 'using stored FE for', name_i, effect, prior
                if prior['dist'] == 'TruncatedNormal':
                    beta.append(
                        MyTruncatedNormal(
                            name_i,
                            mu=float(prior['mu']),
                            tau=pl.maximum(prior['sigma'], .001)**-2,
                            a=prior['lower'],
                            b=prior['upper'],
                            value=.5 * (prior['lower'] + prior['upper'])))
                elif prior['dist'] == 'Normal':
                    beta.append(
                        mc.Normal(name_i,
                                  mu=float(prior['mu']),
                                  tau=pl.maximum(prior['sigma'], .001)**-2,
                                  value=float(prior['mu'])))
                elif prior['dist'] == 'Constant':
                    beta.append(float(prior['mu']))
                else:
                    assert 0, 'ERROR: prior distribution "%s" is not implemented' % prior[
                        'dist']
            else:
                beta.append(mc.Normal(name_i, mu=0., tau=1.**-2, value=0))

        # sigma for "constant" beta
        const_beta_sigma = []
        for i, effect in enumerate(X.columns):
            name_i = 'beta_%s_%s' % (name, effect)
            if 'fixed_effects' in parameters and effect in parameters[
                    'fixed_effects']:
                prior = parameters['fixed_effects'][effect]
                if prior['dist'] == 'Constant':
                    const_beta_sigma.append(float(prior.get('sigma', 1.e-6)))
                else:
                    const_beta_sigma.append(pl.nan)
            else:
                const_beta_sigma.append(pl.nan)

    @mc.deterministic(name='pi_%s' % name)
    def pi(mu=mu,
           U=pl.array(U, dtype=float),
           alpha=alpha,
           X=pl.array(X, dtype=float),
           beta=beta):
        return mu * pl.exp(
            pl.dot(U, [float(x)
                       for x in alpha]) + pl.dot(X, [float(x) for x in beta]))

    return dict(pi=pi,
                U=U,
                U_shift=U_shift,
                sigma_alpha=sigma_alpha,
                alpha=alpha,
                alpha_potentials=alpha_potentials,
                X=X,
                X_shift=X_shift,
                beta=beta,
                hierarchy=model.hierarchy,
                const_alpha_sigma=const_alpha_sigma,
                const_beta_sigma=const_beta_sigma)
Beispiel #40
0
def f(x):
    return pylab.maximum(pylab.absolute(x * pylab.sin(x)),
                         pylab.absolute(x * pylab.cos(x)))
Beispiel #41
0
def ideal_psp_fixmem(x, tau_m, time):
    start, A, tau, offset = x
    t = p.maximum(time - start, 0)
    return A * (p.exp(-t / tau) - p.exp(-t / tau_m)) + offset
Beispiel #42
0
 def p_obs(value=p, pi=pi, delta=delta, n=n):
     return mc.negative_binomial_like(pl.maximum(value * n, pi * n),
                                      pi * n + 1.e-9, delta)
Beispiel #43
0
def psp(time, start, A, tau_1, tau_2, offset, noise):
    t = p.maximum(time - start, 0)
    return A * (p.exp(-t / tau_2) - p.exp(-t / tau_1)) \
        + p.random(t.shape) * noise + offset
Beispiel #44
0
 def p_obs(value=p, pi=pi, delta=delta, n=n):
     return mc.negative_binomial_like(pl.maximum(value * n, pi * n), pi * n + 1.0e-9, delta)
Beispiel #45
0
def f(x):
    return pylab.maximum(abs(x * pylab.sin(x)), abs(x * pylab.cos(x)))
Beispiel #46
0
def setup(dm, key, data_list=[], rate_stoch=None, emp_prior={}, lower_bound_data=[]):
    """ Generate the PyMC variables for a negative-binomial model of
    a single rate function

    Parameters
    ----------
    dm : dismod3.DiseaseModel
      the object containing all the data, priors, and additional
      information (like input and output age-mesh)
      
    key : str
      the name of the key for everything about this model (priors,
      initial values, estimations)

    data_list : list of data dicts
      the observed data to use in the negative binomial liklihood function

    rate_stoch : pymc.Stochastic, optional
      a PyMC stochastic (or deterministic) object, with
      len(rate_stoch.value) == len(dm.get_estimation_age_mesh()).
      This is used to link rate stochs into a larger model,
      for example.

    emp_prior : dict, optional
      the empirical prior dictionary, retrieved from the disease model
      if appropriate by::

          >>> t, r, y, s = dismod3.utils.type_region_year_sex_from_key(key)
          >>> emp_prior = dm.get_empirical_prior(t)

    Results
    -------
    vars : dict
      Return a dictionary of all the relevant PyMC objects for the
      rate model.  vars['rate_stoch'] is of particular
      relevance; this is what is used to link the rate model
      into more complicated models, like the generic disease model.
    """
    vars = {}
    est_mesh = dm.get_estimate_age_mesh()
    param_mesh = dm.get_param_age_mesh()

    if pl.any(pl.diff(est_mesh) != 1):
        raise ValueError, 'ERROR: Gaps in estimation age mesh must all equal 1'

    # calculate effective sample size for all data and lower bound data
    dm.calc_effective_sample_size(data_list)
    dm.calc_effective_sample_size(lower_bound_data)

    # generate regional covariates
    covariate_dict = dm.get_covariates()
    derived_covariate = dm.get_derived_covariate_values()
    X_region, X_study = regional_covariates(key, covariate_dict, derived_covariate)

    # use confidence prior from prior_str  (only for posterior estimate, this is overridden below for empirical prior estimate)
    mu_delta = 1000.
    sigma_delta = 10.
    mu_log_delta = 3.
    sigma_log_delta = .25
    from dismod3.settings import PRIOR_SEP_STR
    for line in dm.get_priors(key).split(PRIOR_SEP_STR):
        prior = line.strip().split()
        if len(prior) == 0:
            continue
        if prior[0] == 'heterogeneity':
            # originally designed for this:
            mu_delta = float(prior[1])
            sigma_delta = float(prior[2])

            # HACK: override design to set sigma_log_delta,
            # .25 = very, .025 = moderately, .0025 = slightly
            if float(prior[2]) > 0:
                sigma_log_delta = .025 / float(prior[2])


    # use the empirical prior mean if it is available
    if len(set(emp_prior.keys()) & set(['alpha', 'beta', 'gamma'])) == 3:
        mu_alpha = pl.array(emp_prior['alpha'])
        sigma_alpha = pl.array(emp_prior['sigma_alpha'])
        alpha = pl.array(emp_prior['alpha']) # TODO: make this stochastic
        vars.update(region_coeffs=alpha)

        beta = pl.array(emp_prior['beta']) # TODO: make this stochastic
        sigma_beta = pl.array(emp_prior['sigma_beta'])
        vars.update(study_coeffs=beta)

        mu_gamma = pl.array(emp_prior['gamma'])
        sigma_gamma = pl.array(emp_prior['sigma_gamma'])

        # Do not inform dispersion parameter from empirical prior stage
        # if 'delta' in emp_prior:
        #    mu_delta = emp_prior['delta']
        #    if 'sigma_delta' in emp_prior:
        #        sigma_delta = emp_prior['sigma_delta']
    else:
        import dismod3.regional_similarity_matrices as similarity_matrices
        n = len(X_region)
        mu_alpha = pl.zeros(n)
        sigma_alpha = .025  # TODO: make this a hyperparameter, with a traditional prior, like inverse gamma
        C_alpha = similarity_matrices.regions_nested_in_superregions(n, sigma_alpha)

        # use alternative region effect covariance structure if requested
        region_prior_key = 'region_effects'
        if region_prior_key in dm.params:
            if dm.params[region_prior_key] == 'uninformative':
                C_alpha = similarity_matrices.uninformative(n, sigma_alpha)

        region_prior_key = 'region_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]
        if region_prior_key in dm.params:
            if dm.params[region_prior_key] == 'uninformative':
                C_alpha = similarity_matrices.regions_nested_in_superregions(n, dm.params[region_prior_key]['std'])

        # add informative prior for sex effect if requested
        sex_prior_key = 'sex_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]
        if sex_prior_key in dm.params:
            print 'adjusting prior on sex effect coefficient for %s' % key
            mu_alpha[n-1] = pl.log(dm.params[sex_prior_key]['mean'])
            sigma_sex = (pl.log(dm.params[sex_prior_key]['upper_ci']) - pl.log(dm.params[sex_prior_key]['lower_ci'])) / (2*1.96)
            C_alpha[n-1, n-1]= sigma_sex**2.

        # add informative prior for time effect if requested
        time_prior_key = 'time_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if time_prior_key in dm.params:
            print 'adjusting prior on time effect coefficient for %s' % key
            mu_alpha[n-2] = pl.log(dm.params[time_prior_key]['mean'])
            sigma_time = (pl.log(dm.params[time_prior_key]['upper_ci']) - pl.log(dm.params[time_prior_key]['lower_ci'])) / (2*1.96)
            C_alpha[n-2, n-2]= sigma_time**2.
        
        #C_alpha = similarity_matrices.all_related_equally(n, sigma_alpha)
        alpha = mc.MvNormalCov('region_coeffs_%s' % key, mu=mu_alpha,
                            C=C_alpha,
                            value=mu_alpha)
        vars.update(region_coeffs=alpha, region_coeffs_step_cov=.005*C_alpha)

        mu_beta = pl.zeros(len(X_study))
        sigma_beta = .1

        # add informative prior for beta effect if requested
        prior_key = 'beta_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if prior_key in dm.params:
            print 'adjusting prior on beta effect coefficients for %s' % key
            mu_beta = pl.array(dm.params[prior_key]['mean'])
            sigma_beta = pl.array(dm.params[prior_key]['std'])

        beta = mc.Normal('study_coeffs_%s' % key, mu=mu_beta, tau=sigma_beta**-2., value=mu_beta)
        vars.update(study_coeffs=beta)

        mu_gamma = 0.*pl.ones(len(est_mesh))
        sigma_gamma = 2.*pl.ones(len(est_mesh))

        # add informative prior for gamma effect if requested
        prior_key = 'gamma_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if prior_key in dm.params:
            print 'adjusting prior on gamma effect coefficients for %s' % key
            mu_gamma = pl.array(dm.params[prior_key]['mean'])
            sigma_gamma = pl.array(dm.params[prior_key]['std'])

        # always use dispersed prior on delta for empirical prior phase
        mu_log_delta = 3.
        sigma_log_delta = .25
        # add informative prior for delta effect if requested
        prior_key = 'delta_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
        if prior_key in dm.params:
            print 'adjusting prior on delta effect coefficients for %s' % key
            mu_log_delta = dm.params[prior_key]['mean']
            sigma_log_delta = dm.params[prior_key]['std']

    mu_zeta = 0.
    sigma_zeta = .25
    # add informative prior for zeta effect if requested
    prior_key = 'zeta_effect_%s'%key.split(dismod3.settings.KEY_DELIM_CHAR)[0]  # HACK: sometimes key is just parameter type, sometimes it is type+region+year+sex
    if prior_key in dm.params:
        print 'adjusting prior on zeta effect coefficients for %s' % key
        mu_zeta = dm.params[prior_key]['mean']
        sigma_zeta = dm.params[prior_key]['std']
    
    if mu_delta != 0.:
        if sigma_delta != 0.:
            log_delta = mc.Normal('log_dispersion_%s' % key, mu=mu_log_delta, tau=sigma_log_delta**-2, value=3.)
            zeta = mc.Normal('zeta_%s'%key, mu=mu_zeta, tau=sigma_zeta**-2, value=mu_zeta)
            delta = mc.Lambda('dispersion_%s' % key, lambda x=log_delta: 50. + 10.**x)
            vars.update(dispersion=delta, log_dispersion=log_delta, zeta=zeta, dispersion_step_sd=.1*log_delta.parents['tau']**-.5)
        else:
            delta = mc.Lambda('dispersion_%s' % key, lambda x=mu_delta: mu_delta)
            vars.update(dispersion=delta)
        
    else:
        delta = mc.Lambda('dispersion_%s' % key, lambda mu=mu_delta: 0)
        vars.update(dispersion=delta)

    if len(sigma_gamma) == 1:
        sigma_gamma = sigma_gamma[0]*pl.ones(len(est_mesh))

    # create varible for interpolated rate;
    # also create variable for age-specific rate function, if it does not yet exist
    if rate_stoch:
        # if the rate_stoch already exists, for example prevalence in the generic model,
        # we use it to back-calculate mu and eventually gamma
        mu = rate_stoch

        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(mu=mu, Xa=X_region, Xb=X_study, alpha=alpha, beta=beta):
            return pl.log(pl.maximum(dismod3.settings.NEARLY_ZERO, mu)) - pl.dot(alpha, Xa) - pl.dot(beta, Xb)

        @mc.potential(name='age_coeffs_potential_%s' % key)
        def gamma_potential(gamma=gamma, mu_gamma=mu_gamma, tau_gamma=1./sigma_gamma[param_mesh]**2, param_mesh=param_mesh):
            return mc.normal_like(gamma[param_mesh], mu_gamma[param_mesh], tau_gamma)

        vars.update(rate_stoch=mu, age_coeffs=gamma, age_coeffs_potential=gamma_potential)
    else:
        # if the rate_stoch does not yet exists, we make gamma a stoch, and use it to calculate mu
        # for computational efficiency, gamma is a linearly interpolated version of gamma_mesh
        initial_gamma = pl.log(dismod3.settings.NEARLY_ZERO + dm.get_initial_value(key))

        gamma_mesh = mc.Normal('age_coeffs_mesh_%s' % key, mu=mu_gamma[param_mesh], tau=sigma_gamma[param_mesh]**-2, value=initial_gamma[param_mesh])

        @mc.deterministic(name='age_coeffs_%s' % key)
        def gamma(gamma_mesh=gamma_mesh, param_mesh=param_mesh, est_mesh=est_mesh):
            return dismod3.utils.interpolate(param_mesh, gamma_mesh, est_mesh)

        @mc.deterministic(name=key)
        def mu(Xa=X_region, Xb=X_study, alpha=alpha, beta=beta, gamma=gamma):
            return predict_rate([Xa, Xb], alpha, beta, gamma, lambda f, age: f, est_mesh)

        # Create a guess at the covariance matrix for MCMC proposals to update gamma_mesh
        from pymc.gp.cov_funs import matern
        a = pl.atleast_2d(param_mesh).T
        C = matern.euclidean(a, a, diff_degree = 2, amp = 1.**2, scale = 10.)

        vars.update(age_coeffs_mesh=gamma_mesh, age_coeffs=gamma, rate_stoch=mu, age_coeffs_mesh_step_cov=.005*pl.array(C))

        # adjust value of gamma_mesh based on priors, if necessary
        # TODO: implement more adjustments, currently only adjusted based on at_least priors
        for line in dm.get_priors(key).split(PRIOR_SEP_STR):
            prior = line.strip().split()
            if len(prior) == 0:
                continue
            if prior[0] == 'at_least':
                delta_gamma = pl.log(pl.maximum(mu.value, float(prior[1]))) - pl.log(mu.value)
                gamma_mesh.value = gamma_mesh.value + delta_gamma[param_mesh]

    # create potentials for priors
    dismod3.utils.generate_prior_potentials(vars, dm.get_priors(key), est_mesh)

    # create observed stochastics for data
    vars['data'] = []

    if mu_delta != 0.:  
        value = []
        N = []
        Xa = []
        Xb = []
        ai = []
        aw = []
        Xz = []

        for d in data_list:
            try:
                age_indices, age_weights, Y_i, N_i = values_from(dm, d)
            except ValueError:
                debug('WARNING: could not calculate likelihood for data %d' % d['id'])
                continue

            value.append(Y_i*N_i)
            N.append(N_i)
            Xa.append(covariates(d, covariate_dict)[0])
            Xb.append(covariates(d, covariate_dict)[1])
            Xz.append(float(d.get('bias') or 0.))
            ai.append(age_indices)
            aw.append(age_weights)

            vars['data'].append(d)

        N = pl.array(N)
        Xa = pl.array(Xa)
        Xb = pl.array(Xb)
        Xz = pl.array(Xz)
        value = pl.array(value)
        
        vars['effective_sample_size'] = list(N)
        
    if len(vars['data']) > 0:
        # TODO: consider using only a subset of the rates at each step of the fit to speed computation; say 100 of them
        k = 50000
        if len(vars['data']) < k:
            data_sample = range(len(vars['data']))
        else:
            import random
            @mc.deterministic(name='data_sample_%s' % key)
            def data_sample(n=len(vars['data']), k=k):
                return random.sample(range(n), k)

        @mc.deterministic(name='rate_%s' % key)
        def rates(S=data_sample,
                Xa=Xa, Xb=Xb,
                alpha=alpha, beta=beta, gamma=gamma,
                bounds_func=vars['bounds_func'],
                age_indices=ai,
                age_weights=aw):

            # calculate study-specific rate function
            shifts = pl.exp(pl.dot(Xa[S], alpha) + pl.dot(Xb[S], pl.atleast_1d(beta)))
            exp_gamma = pl.exp(gamma)
            mu = pl.zeros_like(shifts)
            for i,s in enumerate(S):
                mu[i] = pl.dot(age_weights[s], bounds_func(shifts[i] * exp_gamma[age_indices[s]], age_indices[s]))
                # TODO: evaluate speed increase and accuracy decrease of the following:
                #midpoint = age_indices[s][len(age_indices[s])/2]
                #mu[i] = bounds_func(shifts[i] * exp_gamma[midpoint], midpoint)
                # TODO: evaluate speed increase and accuracy decrease of the following: (to see speed increase, need to code this up using difference of running sums
                #mu[i] = pl.dot(pl.ones_like(age_weights[s]) / float(len(age_weights[s])),
                #               bounds_func(shifts[i] * exp_gamma[age_indices[s]], age_indices[s]))
            return mu
        vars['expected_rates'] = rates
        
        @mc.observed
        @mc.stochastic(name='data_%s' % key)
        def obs(value=value,
                S=data_sample,
                N=N,
                mu_i=rates,
                Xz=Xz,
                zeta=zeta,
                delta=delta):
            #zeta_i = .001
            #residual = pl.log(value[S] + zeta_i) - pl.log(mu_i*N[S] + zeta_i)
            #return mc.normal_like(residual, 0, 100. + delta)
            logp = mc.negative_binomial_like(value[S], N[S]*mu_i, delta*pl.exp(Xz*zeta))
            return logp

        vars['observed_counts'] = obs

        @mc.deterministic(name='predicted_data_%s' % key)
        def predictions(value=value,
                        N=N,
                        S=data_sample,
                        mu=rates,
                        delta=delta):
            r_S = mc.rnegative_binomial(N[S]*mu, delta)/N[S]
            r = pl.zeros(len(vars['data']))
            r[S] = r_S
            return r

        vars['predicted_rates'] = predictions
        debug('likelihood of %s contains %d rates' % (key, len(vars['data'])))

    # now do the same thing for the lower bound data
    # TODO: refactor to remove duplicated code
    vars['lower_bound_data'] = []
    value = []
    N = []
    Xa = []
    Xb = []
    ai = []
    aw = []
    for d in lower_bound_data:
        try:
            age_indices, age_weights, Y_i, N_i = values_from(dm, d)
        except ValueError:
            debug('WARNING: could not calculate likelihood for data %d' % d['id'])
            continue

        value.append(Y_i*N_i)
        N.append(N_i)
        Xa.append(covariates(d, covariate_dict)[0])
        Xb.append(covariates(d, covariate_dict)[1])
        ai.append(age_indices)
        aw.append(age_weights)

        vars['lower_bound_data'].append(d)

    N = pl.array(N)
    value = pl.array(value)

    if len(vars['lower_bound_data']) > 0:
        @mc.observed
        @mc.stochastic(name='lower_bound_data_%s' % key)
        def obs_lb(value=value, N=N,
                   Xa=Xa, Xb=Xb,
                   alpha=alpha, beta=beta, gamma=gamma,
                   bounds_func=vars['bounds_func'],
                   delta=delta,
                   age_indices=ai,
                   age_weights=aw):

            # calculate study-specific rate function
            shifts = pl.exp(pl.dot(Xa, alpha) + pl.dot(Xb, pl.atleast_1d(beta)))
            exp_gamma = pl.exp(gamma)
            mu_i = [pl.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)]  # TODO: try vectorizing this loop to increase speed
            rate_param = mu_i*N
            violated_bounds = pl.nonzero(rate_param < value)
            logp = mc.negative_binomial_like(value[violated_bounds], rate_param[violated_bounds], delta)
            return logp

        vars['observed_lower_bounds'] = obs_lb
        debug('likelihood of %s contains %d lowerbounds' % (key, len(vars['lower_bound_data'])))

    return vars
Beispiel #47
0
def trim(x, a, b):
    return pl.maximum(a, pl.minimum(b, x))
Beispiel #48
0
def set_birth_prev(value):
    model.vars['logit_C0'].value = mc.logit(pl.maximum(1.e-9, value))
Beispiel #49
0
 def smooth_rate(f=rate, age_indices=age_indices, C=C):
     log_rate = pl.log(pl.maximum(f, NEARLY_ZERO))
     return mc.mv_normal_cov_like(log_rate[age_indices] - log_rate[age_indices].mean(),
                                  pl.zeros_like(age_indices),
                                  C=C)
Beispiel #50
0
def f(x):
    # print("a");
    return cos(x)**2 / pylab.sqrt(pylab.maximum(1, 2 * x - 1))