Beispiel #1
0
def new_gpr_draws(df2, response, amplitude, prior, scale, has_data, draws):
    '''
    (data frame, str, float, data frame, int, int) -> array

    Using the input parameters given above runs an instance of gaussian process
    smoothing in order to account for years where we do not have data. The data
    frame (df2) is specific to a location-age.
    '''
    all_indices = df2.index
    data_indices = df2[(df2.ix[:, -4])].index
    years = df2.loc[all_indices]["year"].values
    data = pd.DataFrame({"year": years, "prior": prior})
    data.sort_values("year", inplace=True)
    data.drop_duplicates(inplace=True)

    def mean_function(x):
        return np.interp(x, data.year.values, data.prior.values)
    M = gp.Mean(mean_function)
    C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2,
                      amp=amplitude, scale=scale)
    df4 = df2.loc[data_indices]
    if has_data:
        gp.observe(M=M, C=C, obs_mesh=df4.year.values,
                   obs_vals=df4[response].values,
                   obs_V=(df4[response + "_sd"].values)**2 +
                   df4[response + "_nsv"].values)
    ca_draws = np.array([gp.Realization(M, C)(years) for d in range(draws)]).T
    return ca_draws
        def _setup(self):
            '''Given the current set of params, setup the interpolator.'''

            x, y, dy = self._regularize()
            if self.diff_degree is None:
                self.diff_degree = 3

            if self.amp is None:
                self.amp = num.std(y - self.mean(x))

            if self.scale is None:
                #self.scale = (self.x.max() - self.x.min())/2
                self.scale = 30

            self.M = GP.Mean(self.mean)
            self.C = GP.Covariance(GP.matern.euclidean,
                                   diff_degree=self.diff_degree,
                                   amp=self.amp,
                                   scale=self.scale)

            GP.observe(self.M,
                       self.C,
                       obs_mesh=x,
                       obs_vals=y,
                       obs_V=num.power(dy, 2))
            self.setup = True
            self.realization = None
Beispiel #3
0
 def collect_data(self):
     obs = np.random.uniform(-10,10,(10,2)) #10 points
     V = np.array([.002,.002]) #lower value makes the GP fit closer to the data, higher value makes it less important to do that.
     data = self.sinxfun(obs) #find the z-value of the points
     gp.observe(self.M, self.C, #current mean and covariance functions ->> it updates these!! (I think)
             obs_mesh=obs, #INPUT values for datapoints
             obs_V = V,  #NOISE?  Variance
             obs_vals = data) #actual OUTPUT values observed
Beispiel #4
0
 def gaussian_process(self):
     """ return a PyMC Gaussian Process mean and covariance to interpolate
     the population-by-age mesh/value data
     """
     # TODO: make this evaluate the function on arange(MAX_AGE) and store the results in the db for better performance
     M, C = uninformative_prior_gp(c=0.,  diff_degree=2., amp=10., scale=200.)
     gp.observe(M, C, self.params['mesh'] + [ MAX_AGE ], self.params['vals'] + [ 0. ], 0.0)
 
     return M, C
Beispiel #5
0
    def gaussian_process(self):
        """
        return a PyMC Gaussian Process mean and covariance to interpolate
        the population-by-age mesh/value data
        """
        from pymc import gp
        from dismod3.bayesian_models import probabilistic_utils

        M, C = probabilistic_utils.uninformative_prior_gp(c=0.,  diff_degree=2., amp=10., scale=200.)
        gp.observe(M, C, self.data['mesh'], self.data['vals'], 0.0)
    
        return M, C
Beispiel #6
0
def gp_puzzle_nub(diff_degree=2., amp=1., scale=1.5, steps=100):
    """ Generate a puzzle nub connecting point a to point b"""

    M, C = uninformative_prior_gp(0., diff_degree, amp, scale)
    gp.observe(M, C, data.puzzle_t, data.puzzle_x, data.puzzle_V)
    GPx = gp.GPSubmodel('GP', M, C, pl.arange(1))
    X = GPx.value.f(pl.arange(0., 1.0001, 1. / steps))

    M, C = uninformative_prior_gp(0., diff_degree, amp, scale)
    gp.observe(M, C, data.puzzle_t, data.puzzle_y, data.puzzle_V)
    GPy = gp.GPSubmodel('GP', M, C, pl.arange(1))
    Y = GPy.value.f(pl.arange(0., 1.0001, 1. / steps))
    
    return X, Y
Beispiel #7
0
def gp_puzzle_nub(diff_degree=2., amp=1., scale=1.5, steps=100):
    """ Generate a puzzle nub connecting point a to point b"""

    M, C = uninformative_prior_gp(0., diff_degree, amp, scale)
    gp.observe(M, C, data.puzzle_t, data.puzzle_x, data.puzzle_V)
    GPx = gp.GPSubmodel('GP', M, C, pl.arange(1))
    X = GPx.value.f(pl.arange(0., 1.0001, 1. / steps))

    M, C = uninformative_prior_gp(0., diff_degree, amp, scale)
    gp.observe(M, C, data.puzzle_t, data.puzzle_y, data.puzzle_V)
    GPy = gp.GPSubmodel('GP', M, C, pl.arange(1))
    Y = GPy.value.f(pl.arange(0., 1.0001, 1. / steps))

    return X, Y
Beispiel #8
0
    def setK(self, k):
        if k != self._k:
            deltaK = (k - self._k)
            self._k = k
            self._adjustedBounds[0] -= deltaK
            self._adjustedBounds[1] += deltaK

            self._cov = _GP.Covariance(self._c, amp=1, r=self._k)
            self._cov_matrix = self._cov(self._fiber, self._fiber)
            self._cov_inv_matrix = linalg.inv(self._cov_matrix)
            self._alpha = numpy.asmatrix(
                numpy.dot(numpy.ones(len(self._fiber)),
                          self._cov_inv_matrix)).T

            self._mean = _GP.Mean(_constant, val=0)
            self._gp = _GP.observe(self._mean,
                                   self._cov,
                                   obs_mesh=self._fiber,
                                   obs_vals=numpy.ones(len(self._fiber)),
                                   obs_V=numpy.zeros(len(self._fiber)) +
                                   self._epsilon)

            if self._precomputedVariance != None:
                self.precomputeVarianceField()
            if self._precomputedMean != None:
                self.precomputeMeanField()
Beispiel #9
0
    def create_gps(self):
        """ Create the smoothness GP gps and the diffusion-associated
        blurring GP gpd.

        ToDo: compute gpd
        """
        # Creating a mean function
        self._means = gp.Mean(constant_mean, val=0)

        # Creating a covairance function
        # The covariance function is multiplied by amp**2, and this effectively
        # multiplies realizations by amp. In other words, a larger amp
        # parameter means that realizations will deviate further from their
        # mean
        self._covs = gp.Covariance(self._cs_pymc, amp=1, R=self._r)
        self.covs_matrix = self._covs(self._fiber, self._fiber)

        # Some parameters needed to compute the inner product
        self._invcovs_matrix = numpy.linalg.inv(self.covs_matrix)
        self.alphas = numpy.asmatrix(
            numpy.dot(numpy.ones(len(self._fiber)), self._invcovs_matrix)).T

        # Normally-distributed observations on gaussian process distribution
        self.gps = gp.observe(
            self._means, self._covs, obs_mesh=self._fiber,
            obs_vals=numpy.ones(len(self._fiber)),
            obs_V=numpy.zeros(len(self._fiber)) + self._observed_variance)
Beispiel #10
0
    def mortality(self, key="all-cause_mortality", data=None):
        """ Calculate the all-cause mortality rate for the
        region and sex of disease_model, and return it
        in an array corresponding to age_mesh

        Parameters
        ----------
        key : str, optional
          of the form 'all-cause_mortality+gbd_region+year+sex'
        data: list, optional
          the data list to extract all-cause mortality from
        """
        if self.params.get("initial_value", {}).has_key(key):
            return self.get_initial_value(key)

        if not data:
            data = self.filter_data("all-cause_mortality data")

        if len(data) == 0:
            return NEARLY_ZERO * np.ones(len(self.get_estimate_age_mesh()))
        else:
            M, C = uninformative_prior_gp(c=-1.0, scale=300.0)
            age = []
            val = []
            V = []
            for d in data:
                scale = self.extract_units(d)
                a0 = d.get("age_start", MISSING)
                a1 = d.get("age_end", MISSING)
                y = self.value_per_1(d)
                se = self.se_per_1(d)

                if se == MISSING:
                    se = 0.01
                if MISSING in [a0, a1, y]:
                    continue

                age.append(0.5 * (a0 + a1))
                val.append(y + 0.00001)
                V.append(se ** 2.0)

            if len(data) > 0:
                gp.observe(M, C, age, mc.logit(val), V)

            normal_approx_vals = mc.invlogit(M(self.get_estimate_age_mesh()))
            self.set_initial_value(key, normal_approx_vals)
            return self.get_initial_value(key)
Beispiel #11
0
def fit_gpr(df, amp, obs_variable='observed_data', obs_var_variable='obs_data_variance', mean_variable='st_prediction', year_variable='year', scale=40, diff_degree=2, draws=0):
    
    initial_columns = list(df.columns)
    
    data = df.ix[(pd.notnull(df[obs_variable])) & (pd.notnull(df[obs_var_variable]))]
    mean_prior = df[[year_variable,mean_variable]].drop_duplicates()
    
    def mean_function(x):
        return np.interp(x, mean_prior[year_variable], mean_prior[mean_variable])
    
    M = gp.Mean(mean_function)
    C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=diff_degree, amp=amp, scale=scale)
    
    if len(data)>0:
        gp.observe(M=M, C=C, obs_mesh=data[year_variable], obs_V=data[obs_var_variable], obs_vals=data[obs_variable])
    
    model_mean = M(mean_prior[year_variable]).T
    #model_variance = np.diagonal(C(p_years,p_years)).T    
    model_variance = C(mean_prior[year_variable])
    try:
        model_lower = model_mean - np.sqrt(model_variance)*1.96
    except:
        pdb.set_trace()

    model_upper = model_mean + np.sqrt(model_variance)*1.96

    if draws > 0:
        realizations = [gp.Realization(M, C)(range(min(mean_prior['year']),max(mean_prior['year'])+1)) for i in range(draws)]

        real_draws = pd.DataFrame({year_variable:mean_prior[year_variable],'gpr_mean':model_mean,'gpr_var':model_variance,'gpr_lower':model_lower,'gpr_upper':model_upper})
        
        for i,r in enumerate(realizations):
            real_draws["draw"+str(i)] = r
        
        real_draws = pd.merge(df,real_draws,on=year_variable,how='left')
        gpr_columns = list(set(real_draws.columns) - set(initial_columns))
        initial_columns.extend(gpr_columns)
        
        return real_draws[initial_columns]
    
    else:
        results = pd.DataFrame({year_variable:mean_prior[year_variable],'gpr_mean':model_mean,'gpr_var':model_variance,'gpr_lower':model_lower,'gpr_upper':model_upper})
        gpr_columns = list(set(results.columns) - set(initial_columns))
        initial_columns.extend(gpr_columns)
        results = pd.merge(df,results,on=year_variable,how='left')
        
        return results
Beispiel #12
0
def normal_approx(asrf):
    """
    This 'normal approximation' of the age-specific rate function is
    formed by using each rate to produce an estimate of the
    age-specific rate, and then saying that that logit of the true
    rate function is a gaussian process and these age-specific rates
    are observations of this gaussian process.

    This is less valid and less accurate than using mcmc or map on the
    vars produced by the model_rate_list method below, but maybe it
    will be faster.
    """
    M,C = uninformative_prior_gp()

    # use prior to set rate near zero as requested
    for prior_str in asrf.fit.get('priors', '').split('\n'):
        prior = prior_str.split()
        if len(prior) > 0 and prior[0] == 'zero':
            age_start = int(prior[1])
            age_end = int(prior[2])

            gp.observe(M, C, range(age_start, age_end+1), [-10.], [0.])
               
    for r in asrf.rates.all():
        mesh, obs, V = logit_rate_from_range(r)

        # make sure that there is something to observe
        if mesh == []:
            continue
        
        # uncomment the following line to make more inferences than
        # are valid from the data
        #gp.observe(M, C, mesh, obs, V)

        # uncomment the following 2 lines to make less inferences than
        # possible: it may be better to waste information than have
        # false confidence
        ii = len(mesh)/2
        gp.observe(M, C, [mesh[ii]], [obs[ii]], [V[ii]])

    x = asrf.fit['out_age_mesh']
    na_rate = mc.invlogit(M(x))
    asrf.fit['normal_approx'] = list(na_rate)
    asrf.save()

    return M, C
Beispiel #13
0
    def _setup(self):
       '''Given the current set of params, setup the interpolator.'''
 
       x,y,dy = self._regularize()
       if self.diff_degree is None:
          self.diff_degree = 3
 
       if self.amp is None:
          self.amp = num.std(y - self.mean(x))
 
       if self.scale is None:
          #self.scale = (self.x.max() - self.x.min())/2
          self.scale = 30
          
 
       self.M = GP.Mean(self.mean)
       self.C = GP.Covariance(GP.matern.euclidean, diff_degree=self.diff_degree,
                         amp=self.amp, scale=self.scale)
 
       GP.observe(self.M, self.C, obs_mesh=x, obs_vals=y, obs_V=num.power(dy,2))
       self.setup = True
       self.realization = None
  def setK( self, k ):
    if k!=self._k:
      deltaK = (k-self._k)
      self._k = k
      self._adjustedBounds[0]-=deltaK
      self._adjustedBounds[1]+=deltaK
      
      self._cov = _GP.Covariance( self._c, amp=1, r= self._k  ) 
      self._cov_matrix = self._cov( self._fiber, self._fiber )
      self._cov_inv_matrix = linalg.inv( self._cov_matrix )
      self._alpha = numpy.asmatrix(numpy.dot( numpy.ones(len(self._fiber)), self._cov_inv_matrix )).T

      self._mean = _GP.Mean( _constant, val = 0 )
      self._gp = _GP.observe( self._mean, self._cov, obs_mesh = self._fiber, obs_vals=numpy.ones( len(self._fiber)), obs_V = numpy.zeros(len(self._fiber)  )+self._epsilon )

      if self._precomputedVariance!=None:
        self.precomputeVarianceField()
      if self._precomputedMean!=None:
        self.precomputeMeanField()
Beispiel #15
0
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, iters):
    # load in the data
    all_data = csv2rec(infile, use_mrecords=False)
    for m in range(number_submodels):
        all_data = np.delete(
            all_data,
            np.where(np.isnan(all_data['spacetime_' + str(m + 1)]))[0],
            axis=0)

    # Investigate error thrown for HKG, MAC, and SGP... they don't have data, but don't know why this is breaking line 62
    all_data = all_data[all_data['iso3'] != "HKG"]
    all_data = all_data[all_data['iso3'] != "MAC"]
    all_data = all_data[all_data['iso3'] != "SGP"]

    # find the list of years for which we need to predict
    year_list = np.unique(all_data.year)

    # find the list of country/age groups
    country_age = np.array([all_data.iso3[i] for i in range(len(all_data))])
    country_age_list = np.repeat(np.unique(country_age), len(year_list))

    # make empty arrays in which to store the results
    draws = [
        np.empty(len(country_age_list), 'float')
        for i in range(iters * number_submodels * 2)
    ]
    iso3 = np.empty(len(country_age_list), '|S3')
    # age_group = np.empty(len(country_age_list), 'int')
    year = np.empty(len(country_age_list), 'int')

    # loop through country/age groups
    for ca in np.unique(country_age_list):

        print('GPRing ' + ca)

        # subset the data for this particular country/age
        ca_data = all_data[country_age == ca]

        # subset just the observed data
        if ca_data['lt_prev'].dtype != '|O8':
            ca_observed = ca_data[(np.isnan(ca_data['lt_prev']) == 0)]
            if len(ca_observed) > 1:
                has_data = True
            else:
                has_data = False
        else:
            has_data = False

        # loop through each submodel
        for m in range(number_submodels):

            # identify the dependent variable for this model
            dv = dv_list[m]

            # loop through spacetime/linear
            for x, t in enumerate(['spacetime']):

                # make a list of the spacetime predictions
                ca_prior = np.array([
                    np.mean(ca_data[t + '_' + str(m + 1)][ca_data.year == y])
                    for y in year_list
                ])

                # find the amplitude for this country/age
                amplitude = np.mean(ca_data[t + '_amplitude_' + str(m + 1)])

                # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR
                def mean_function(x):
                    return np.interp(x, year_list, ca_prior)

                # setup the covariance function
                M = gp.Mean(mean_function)
                C = gp.Covariance(eval_fun=gp.matern.euclidean,
                                  diff_degree=2,
                                  amp=amplitude,
                                  scale=scale)

                # observe the data if there is any
                if has_data:
                    gp.observe(M=M,
                               C=C,
                               obs_mesh=ca_observed.year,
                               obs_V=ca_observed[t + '_data_variance_' +
                                                 str(m + 1)],
                               obs_vals=ca_observed['lt_prev'])

                # draw realizations from the data
                realizations = [gp.Realization(M, C) for i in range(iters)]

                # save the data for this country/age into the results array
                iso3[country_age_list == ca] = ca[0:3]
                # age_group[country_age_list==ca] = ca[4:]
                year[country_age_list == ca] = year_list.T
                for i in range(iters):
                    draws[((2 * m + x) * iters) + i][
                        country_age_list == ca] = realizations[i](year_list)

    # save the results
    print('Saving GPR results')
    names = ['iso3', 'age_group', 'year']
    results = np.core.records.fromarrays([iso3, year], names=names)
    for m in range(number_submodels):
        for x, t in enumerate(['spacetime']):
            for i in range(iters):
                results = recfunctions.append_fields(
                    results, 'gpr_' + str(m + 1) + '_' + t + '_d' + str(i + 1),
                    draws[((2 * m + x) * iters) + i])
            results = recfunctions.append_fields(
                results, 'gpr_' + str(m + 1) + '_' + t + '_mean',
                np.mean(draws[((2 * m + x) * iters):((2 * m + x + 1) * iters)],
                        axis=0))
        rec2csv(results, outfile)
Beispiel #16
0
def fit_gpr(
        df, amp, obs_variable='observed_data',
        obs_var_variable='obs_data_variance', mean_variable='st_prediction',
        year_variable='year_id', scale=10, diff_degree=2, draws=0):

    initial_columns = list(df.columns)

    data = df[(df[obs_variable].notnull()) & (df[obs_var_variable].notnull())]
    mean_prior = df[[year_variable, mean_variable]].drop_duplicates()

    def mean_function(x):
        return np.interp(
                x, mean_prior[year_variable], mean_prior[mean_variable])

    M = gp.Mean(mean_function)
    C = gp.Covariance(
            eval_fun=gp.matern.euclidean, diff_degree=diff_degree,
            amp=amp, scale=scale)

    if len(data) > 0:
        gp.observe(
                M=M, C=C, obs_mesh=data[year_variable],
                obs_V=data[obs_var_variable], obs_vals=data[obs_variable])

    model_mean = M(mean_prior[year_variable]).T
    # model_variance = np.diagonal(C(p_years,p_years)).T
    model_variance = C(mean_prior[year_variable])
    model_lower = model_mean - np.sqrt(model_variance)*1.96
    model_upper = model_mean + np.sqrt(model_variance)*1.96

    if draws > 0:
        """
        The pymc version of drawing realizations... slower than just
        sampling directly from the MVN, but should give the same result:

        realizations = [
            gp.Realization(M, C)(range(1980,2014)) for i in range(draws)]
        """

        real_draws = pd.DataFrame({
            year_variable: mean_prior[year_variable],
            'gpr_mean': model_mean, 'gpr_var': model_variance,
            'gpr_lower': model_lower, 'gpr_upper': model_upper})

        realizations = np.random.multivariate_normal(
                model_mean,
                C(mean_prior[year_variable], mean_prior[year_variable]), draws)

        for i, r in enumerate(realizations):
            real_draws["draw_"+str(i)] = r

        real_draws = pd.merge(df, real_draws, on=year_variable, how='left')
        # gpr_columns = list(set(real_draws.columns) - set(initial_columns))
        gpr_columns = ['gpr_mean', 'gpr_var', 'gpr_lower', 'gpr_upper']
        draw_columns = ['draw_'+str(i) for i in range(draws)]
        initial_columns.extend(gpr_columns)
        initial_columns.extend(draw_columns)

        return real_draws[initial_columns]

    else:
        results = pd.DataFrame({
            year_variable: mean_prior[year_variable], 'gpr_mean': model_mean,
            'gpr_var': model_variance, 'gpr_lower': model_lower,
            'gpr_upper': model_upper})
        gpr_columns = list(set(results.columns) - set(initial_columns))
        initial_columns.extend(gpr_columns)
        results = pd.merge(df, results, on=year_variable, how='left')

        return results
Beispiel #17
0
def fit_gpr(df,
            amp,
            obs_variable='observed_data',
            obs_var_variable='obs_data_variance',
            mean_variable='st_prediction',
            year_variable='year',
            scale=40,
            diff_degree=2,
            draws=0):

    initial_columns = list(df.columns)

    data = df.ix[(pd.notnull(df[obs_variable]))
                 & (pd.notnull(df[obs_var_variable]))]
    mean_prior = df[[year_variable, mean_variable]].drop_duplicates()

    def mean_function(x):
        return np.interp(x, mean_prior[year_variable],
                         mean_prior[mean_variable])

    M = gp.Mean(mean_function)
    C = gp.Covariance(eval_fun=gp.matern.euclidean,
                      diff_degree=diff_degree,
                      amp=amp,
                      scale=scale)

    if len(data) > 0:
        gp.observe(M=M,
                   C=C,
                   obs_mesh=data[year_variable],
                   obs_V=data[obs_var_variable],
                   obs_vals=data[obs_variable])

    model_mean = M(mean_prior[year_variable]).T
    #model_variance = np.diagonal(C(p_years,p_years)).T
    model_variance = C(mean_prior[year_variable])
    model_lower = model_mean - np.sqrt(model_variance) * 1.96
    model_upper = model_mean + np.sqrt(model_variance) * 1.96

    if draws > 0:
        realizations = [
            gp.Realization(M, C)(range(min(mean_prior['year']),
                                       max(mean_prior['year']) + 1))
            for i in range(draws)
        ]

        real_draws = pd.DataFrame({
            year_variable: mean_prior[year_variable],
            'gpr_mean': model_mean,
            'gpr_var': model_variance,
            'gpr_lower': model_lower,
            'gpr_upper': model_upper
        })

        for i, r in enumerate(realizations):
            real_draws["draw" + str(i)] = r

        real_draws = pd.merge(df, real_draws, on=year_variable, how='left')
        gpr_columns = list(set(real_draws.columns) - set(initial_columns))
        initial_columns.extend(gpr_columns)

        return real_draws[initial_columns]

    else:
        results = pd.DataFrame({
            year_variable: mean_prior[year_variable],
            'gpr_mean': model_mean,
            'gpr_var': model_variance,
            'gpr_lower': model_lower,
            'gpr_upper': model_upper
        })
        gpr_columns = list(set(results.columns) - set(initial_columns))
        initial_columns.extend(gpr_columns)
        results = pd.merge(df, results, on=year_variable, how='left')

        return results
Beispiel #18
0
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test,
            spacetime_iters, top_submodel):
    # load in the data
    all_data = csv2rec(infile, use_mrecords=False)
    for m in range(number_submodels):
        if all_data['spacetime_' + str(m + 1)].dtype == 'float64':
            all_data = np.delete(
                all_data,
                np.where(np.isnan(all_data['spacetime_' + str(m + 1)]))[0],
                axis=0)

    # find the list of years for which we need to predict
    year_list = np.unique(all_data.year)

    # find the list of country/age groups
    country_age = np.array([
        str(all_data.iso3[i]) + '_' + str(all_data.age_group[i])
        for i in range(len(all_data))
    ])
    country_age_list = np.repeat(np.unique(country_age), len(year_list))

    # make empty arrays in which to store the results
    total_iters = np.sum(spacetime_iters)
    draws = [
        np.empty(len(country_age_list), 'float') for i in range(total_iters)
    ]
    if (top_submodel > 0):
        top_submodel_draws = [
            np.empty(len(country_age_list), 'float') for i in range(100)
        ]
    iso3 = np.empty(len(country_age_list), '|S3')
    age_group = np.empty(len(country_age_list), 'int')
    year = np.empty(len(country_age_list), 'int')

    # loop through country/age groups
    for ca in np.unique(country_age_list):
        print('GPRing ' + ca)

        # subset the data for this particular country/age
        ca_data = all_data[country_age == ca]

        # subset just the observed data
        if ca_data['lt_cf'].dtype != '|O8':
            ca_observed = ca_data[(np.isnan(ca_data['lt_cf']) == 0)
                                  & (ca_data['test_' + test] == 0)]
            if len(ca_observed) > 1:
                has_data = True
            else:
                has_data = False
        else:
            has_data = False

        # keep track of how many iterations have been added for this model
        iter_counter = 0

        # loop through each submodel
        for m in range(number_submodels):

            # identify the dependent variable for this model
            dv = dv_list[m]

            # continue making predictions if we actually need draws for this model
            if (spacetime_iters[m] > 0) or (m + 1 == top_submodel):

                # skip models with no spacetime results
                if all_data['spacetime_' + str(m + 1)].dtype != 'float64':
                    for i in range(spacetime_iters[m]):
                        draws[iter_counter][country_age_list == ca] = np.NaN
                        iter_counter += 1
                    if (m + 1 == top_submodel):
                        for i in range(100):
                            top_submodel_draws[i][country_age_list ==
                                                  ca] = np.NaN
                    continue

                # make a list of the spacetime predictions
                ca_prior = np.array([
                    np.mean(ca_data['spacetime_' +
                                    str(m + 1)][ca_data.year == y])
                    for y in year_list
                ])

                # find the amplitude for this country/age
                amplitude = np.mean(ca_data['spacetime_amplitude_' +
                                            str(m + 1)])

                # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR
                def mean_function(x):
                    return np.interp(x, year_list, ca_prior)

                # setup the covariance function
                M = gp.Mean(mean_function)
                C = gp.Covariance(eval_fun=gp.matern.euclidean,
                                  diff_degree=2,
                                  amp=amplitude,
                                  scale=scale)

                # observe the data if there is any
                if has_data:
                    gp.observe(M=M,
                               C=C,
                               obs_mesh=ca_observed.year,
                               obs_V=ca_observed['spacetime_data_variance_' +
                                                 str(m + 1)],
                               obs_vals=ca_observed[dv])

                # draw realizations from the data
                realizations = [
                    gp.Realization(M, C) for i in range(spacetime_iters[m])
                ]

                # save the data for this country/age into the results array
                iso3[country_age_list == ca] = ca[0:3]
                age_group[country_age_list == ca] = ca[4:]
                year[country_age_list == ca] = year_list.T
                for i in range(spacetime_iters[m]):
                    try:
                        draws[iter_counter][country_age_list ==
                                            ca] = realizations[i](year_list)
                    except:
                        print('Failure in ' + ca)
                    iter_counter += 1

                # if it's the top submodel, do 100 additional draws
                if (m + 1 == top_submodel):
                    realizations = [gp.Realization(M, C) for i in range(100)]
                    for i in range(100):
                        try:
                            top_submodel_draws[i][country_age_list ==
                                                  ca] = realizations[i](
                                                      year_list)
                        except:
                            print('Failure in ' + ca)

    # save the results
    print('Saving GPR results')
    names = ['iso3', 'age_group', 'year']
    results = np.core.records.fromarrays([iso3, age_group, year], names=names)
    for i in range(total_iters):
        results = recfunctions.append_fields(results,
                                             'ensemble_d' + str(i + 1),
                                             draws[i])
    if (top_submodel > 0):
        for i in range(100):
            results = recfunctions.append_fields(results,
                                                 'top_submodel_d' + str(i + 1),
                                                 top_submodel_draws[i])
    rec2csv(results, outfile)
Beispiel #19
0
    def plot2dsurf(self,
                   param1,
                   param2,
                   ax=None,
                   xrange=None,
                   yrange=None,
                   bins=30,
                   smooth=False,
                   bfac=2,
                   sfac=1.,
                   dd=3,
                   cmap=cm.gray_r,
                   levels=[],
                   ccolor='red',
                   fill=False,
                   ccmap=None,
                   falpha=1.0,
                   outfile=None,
                   zorder=None):
        '''Plot up a 2D binned paramter plot for [param1] and [param2].
      if [ax] is supplied, use it to plot, otherwise, open up a new figure
      and axes.  You can specify [xrange] and [yrange].  [bins] will be
      passed to histogram2d.  If [smooth], the binned surface is smoothed
      using either a bivariate spline or a Gaussian Process (if pymc.gp is
      available).  If [cmap] is None, no image is drawn.  If [levels] is
      specified as fractions (0.68, 0.95, etc), draw the contours that
      enclose this fraction of the data.'''
        if ax is None:
            fig = plt.figure()
            ax = fig.add_subplot(111)
            own_ax = True
        else:
            own_ax = False

        #if ccmap is not None and ccolor is not None:
        #   # Cmap takes precedence
        #   ccolor = None

        tr1 = self.get_trace0(param1)
        tr2 = self.get_trace0(param2)
        if len(tr1.shape) != 1 or len(tr2.shape) != 1:
            raise RuntimeError, "Error, variables must be scalars, try using ':' notation"
        #tr1 = tr1[:,0]
        #tr2 = tr2[:,0]
        range = [[tr1.min(), tr1.max()], [tr2.min(), tr2.max()]]
        if xrange is not None:
            range[0] = list(xrange)
        if yrange is not None:
            range[1] = list(yrange)

        # first, bin up the data (all of it)
        grid, xs, ys = histogram2d(tr1, tr1, bins=bins, range=range)
        grid = grid.T * 1.0
        xplot = linspace(xs[0], xs[-1], 101)
        yplot = linspace(ys[0], ys[-1], 101)
        extent = [xs[0], xs[-1], ys[0], ys[-1]]

        xs = (xs[1:] + xs[:-1]) / 2
        ys = (ys[1:] + ys[:-1]) / 2

        x, y = meshgrid(xs, ys)
        tx = xs[::bfac]
        ty = ys[::bfac]
        if smooth and not gp:
            tck = bisplrep(ravel(x),
                           ravel(y),
                           ravel(grid),
                           task=-1,
                           tx=tx,
                           ty=ty)
            x = linspace(xs[0], xs[-1], 501)
            y = linspace(ys[0], ys[-1], 501)
            grid = bisplev(x, y, tck).T
        elif smooth and gp:
            M = gp.Mean(
                lambda x: zeros(x.shape[:-1], dtype=float) + median(grid))
            scalerat = (tr2.max() - tr2.min()) / (tr1.max() - tr1.min())
            C = gp.Covariance(gp.matern.aniso_euclidean,
                              diff_degree=dd,
                              scale=(tr1.max() - tr1.min()) * sfac,
                              amp=std(grid),
                              scalerat=scalerat)
            x, y = meshgrid(xs, ys)
            mesh = vstack((ravel(x), ravel(y))).T
            gp.observe(M,
                       C,
                       obs_mesh=mesh,
                       obs_vals=ravel(grid),
                       obs_V=ravel(grid))
            dplot = dstack(meshgrid(xplot, yplot))
            grid, Vsurf = gp.point_eval(M, C, dplot)

        grid = where(grid < 0, 0, grid)

        if cmap:
            ax.imshow(grid,
                      extent=extent,
                      origin='lower',
                      aspect='auto',
                      interpolation='nearest',
                      cmap=cmap)
        if levels:
            prob = ravel(grid) / sum(grid)
            sprob = sort(prob)
            cprob = 1.0 - cumsum(sprob)
            clevels = []
            for l in levels:
                id = nonzero(greater(cprob - l, 0))[0][-1]
                clevels.append(sprob[id])
            prob.shape = grid.shape

            clevels.sort()
            norm = Normalize(clevels[0] * 0.5, clevels[-1] * 1.3)
            if fill:
                ax.contourf(prob,
                            levels=clevels + [1],
                            extent=extent,
                            origin='lower',
                            alpha=falpha,
                            cmap=ccmap,
                            norm=norm,
                            zorder=zorder)
            ax.contour(prob,
                       levels=clevels,
                       colors=ccolor,
                       extent=extent,
                       origin='lower',
                       linewidths=2,
                       zorder=zorder)

        if own_ax:
            ax.set_xlabel("$%s$" % param1)
            ax.set_ylabel("$%s$" % param2)
            if xrange is not None:
                ax.set_xlim(xrange[0], xrange[1])
            if yrange is not None:
                ax.set_ylim(yrange[0], yrange[1])
            plt.draw()
            if outfile is not None:
                fig.savefig(outfile)
            return fig
Beispiel #20
0
def plus_minus(arr,
               bins=30,
               conf=0.68,
               xrange=None,
               func='poly',
               fit_log=True,
               order=7,
               debug=False,
               zero_pad=False,
               end_tol=[None, None]):
    hist0, bins = histogram(arr, bins=bins, range=xrange)
    xb = (bins[1:] + bins[:-1]) / 2
    if fit_log:
        gids = greater(hist0, 0)
        xb = xb[gids]
        var = 1. / hist0[gids]
        hist = log(hist0[gids])
    else:
        var = hist0 * 1
        hist = hist0 * 1
    if xrange is None:
        xrange = (bins[0], bins[-1])
    xplot = linspace(xrange[0] * 0.9, xrange[1] * 1.1, 101)
    if debug:
        fig = plt.figure()
        if fit_log:
            y1 = hist
            y2 = exp(hist)
        else:
            y1 = log(hist)
            y2 = hist
        ax1 = fig.add_subplot(211)
        ax1.plot(xb, y1, 'o')
        ax2 = fig.add_subplot(212)
        ax2.plot(xb, y2, 'o')

    if func == 'gp' or func == 'poly':
        if func == 'gp':
            if not gp:
                raise RuntimeError, "To use GP interpolation, you need to install pymc"
            scale = xb.max() - xb.min()
            M = gp.Mean(
                lambda x: zeros(x.shape[0], dtype=float32) + median(hist))
            C = gp.Covariance(gp.matern.euclidean,
                              diff_degree=3,
                              scale=scale * 0.5,
                              amp=std(hist))

            # Pad with zeros
            if zero_pad and not fit_log:
                obs_mesh = concatenate([
                    xb.min() + (xb - xb.max())[:-1], xb,
                    xb.max() + (xb - xb.min())[1:]
                ])
                obs = concatenate([hist[1:] * 0, hist, hist[1:] * 0])
                var = concatenate([hist[1:] * 0, var, hist[1:] * 0])
            else:
                obs_mesh = xb
                obs = hist
            gp.observe(M, C, obs_mesh=obs_mesh, obs_vals=obs, obs_V=var)

            func = lambda x: wrap_M(x, M, xb[0], xb[-1], log=fit_log)

        else:
            x0 = xb[argmax(hist)]
            pars, epars = fit_poly.fitpoly(xb,
                                           hist,
                                           w=1. / var,
                                           x0=x0,
                                           k=order)
            func = lambda x: wrap_poly(x, x0, pars, xb[0], xb[-1], log=fit_log)

        if debug:
            ax1.plot(xplot, log(func(xplot)), '-')
            ax2.plot(xplot, func(xplot), '-')
        oneside = False
        if argmax(hist) == 0:
            mod = xb[0]
            oneside = True
        elif argmax(hist) == len(xb) - 1:
            mod = xb[-1]
            oneside = True
        else:
            mod0 = xb[argmax(hist)]
            try:
                mod = brent(lambda x: -func(x),
                            brack=(xb.min(), mod0, xb.max()))
            except:
                # monotonic.  Take extremum
                oneside = True
                if func(xb[0]) > func(xb[-1]):
                    mod = xb[0]
                else:
                    mod = xb[-1]

        fac = integrate.quad(func, xb[0], xb[-1])[0]
        prob = lambda x: func(x) / fac

        #end tolerance  if requested
        lower_limit = False
        upper_limit = False
        if end_tol[0] is not None and float(
                hist0[0]) / hist0.max() > end_tol[0]:
            lower_limit = True
        if end_tol[1] is not None and float(
                hist0[-1]) / hist0.max() > end_tol[1]:
            upper_limit = True
        if lower_limit and upper_limit:
            #  too flat, return mode, but no limits
            return mod, nan, nan
        elif lower_limit and not upper_limit:
            # one-sided
            tail = (1 - conf)
            upper = brentq(\
                  lambda x: integrate.quad(prob, x, xplot[-1])[0]-tail,
                     mod, xplot[-1])
            return mod, nan, upper
        elif upper_limit and not lower_limit:
            tail = (1 - conf)
            lower = brentq(\
                  lambda x: integrate.quad(prob, xplot[0], x)[0]-tail,
                  xplot[0], xplot[-1])
            return mod, lower, nan

        if debug:
            ax1.axvline(mod, color='red')
            ax2.axvline(mod, color='red')

        if oneside:
            tail = (1 - conf)
        else:
            tail = (1 - conf) / 2
        if integrate.quad(prob, xplot[0], mod)[0] < tail:
            # No lower bound
            minus = nan
        else:
            lower = brentq(\
                  lambda x: integrate.quad(prob, xplot[0], x)[0]-tail,
                  xplot[0], mod)
            minus = mod - lower
            if debug:
                ax1.axvline(lower, color='orange')
                ax2.axvline(lower, color='orange')
        #test for upper bound
        if integrate.quad(prob, mod, xplot[-1])[0] < tail:
            # No upper bound
            plus = nan
        else:
            upper = brentq(\
                  lambda x: integrate.quad(prob, x, xplot[-1])[0]-tail,
                     mod, xplot[-1])
            plus = upper - mod
            if debug:
                ax1.axvline(upper, color='orange')
                ax2.axvline(upper, color='orange')

    else:
        hist = hist * 1.0 / sum(hist)
        mid = argmax(hist)
        mod = xb[mid]
        if debug:
            ax1.axvline(mod, color='red')
            ax2.axvline(mod, color='red')
        i0 = 0
        i1 = len(hist) - 1
        prob = 0
        while (prob < (1 - conf) / 2):
            if i0 < mid:
                i0 += 1
            else:
                break
            prob = sum(hist[0:i0])
        if i0 == 0:
            lower = None
        else:
            lower = xb[i0]
            if debug:
                ax1.axvline(lower, color='orange')
                ax2.axvline(lower, color='orange')
        while (prob < 1 - conf):
            if i1 > mid:
                i1 -= 1
            else:
                break
            prob = sum(hist[0:i0]) + sum(hist[i1:])
        if i1 == len(xb) - 1:
            upper = None
        else:
            upper = xb[i1]
            if debug:
                ax1.axvline(upper, color='orange')
                ax2.axvline(upper, color='orange')
        if upper is not None:
            plus = upper - mod
        else:
            plus = nan
        if lower is not None:
            minus = mod - lower
        else:
            minus = nan
    return mod, minus, plus
        print 'median over all replicates of median absolute relative error'
        print results.unstack()['mare', '50%'].unstack()

    else:
        N = int(options.numberofrows)
        delta_true = float(options.delta)
        replicate = int(options.replicate)

        print 'Running random effects validation for:'
        print 'N', N
        print 'delta_true', delta_true
        print 'replicate', replicate

        M = gp.Mean(validate_consistent_model.constant)
        C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50)
        gp.observe(M, C, [0, 100], [-5, -5])

        true = {}
        li = gp.Realization(M, C)
        true['i'] = lambda x: pl.exp(li(x))
        lr = gp.Realization(M, C)
        true['r'] = lambda x: pl.exp(lr(x))
        lf = gp.Realization(M, C)
        true['f'] = lambda x: pl.exp(lf(x))

        model = validate_consistent_model.validate_consistent_model_sim(
            N, delta_true, true)
        model.results.to_csv(
            '%s/%s/%s-%s-%s-%s.csv' %
            (output_dir, validation_name, options.numberofrows, options.delta,
             '', options.replicate))
Beispiel #22
0
def smooth(x):
    from pymc import gp
    M = gp.Mean(lambda x: zeros(len(x)))
    C = gp.Covariance(gp.matern.euclidean, amp=1, scale=15, diff_degree=2)
    gp.observe(M, C, range(len(x)), x, .5)
    return M(range(len(x)))
        print "\ninspect with:\nresults.unstack()['mare', '50%'].unstack() # for example"
        print "or: results.unstack()['mare', '50%'].unstack(2).reindex(columns='Very Moderately Slightly'.split())"

    else:
        N = int(options.numberofrows)
        delta_true = float(options.delta)
        replicate = int(options.replicate)
        bias = float(options.bias)
        sigma_prior = float(options.sigma)

        print 'Running random effects validation for:'
        print 'N', N
        print 'delta_true', delta_true
        print 'bias', bias
        print 'sigma_prior', sigma_prior
        print 'replicate', replicate

        M = gp.Mean(validate_similarity.quadratic)
        C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50)
        gp.observe(M, C, [0, 30, 100], [-5, -3, -5])

        true = {}
        lp = gp.Realization(M, C)
        true_p = lambda x: pl.exp(lp(x))

        model = validate_similarity.generate_data(N, delta_true, true_p, 'Unusable', bias, sigma_prior)
        for het in 'Very Moderately Slightly'.split():
            model.parameters['p']['heterogeneity'] = het
            validate_similarity.fit(model)
            model.results.to_csv('%s/%s/%s-%s-%s-%s-%s-%s.csv' % (output_dir, validation_name, options.numberofrows, options.delta, het, bias, sigma_prior, options.replicate))
        N = int(options.numberofrows)
        delta_true = float(options.delta)
        sigma_true = float(options.sigma) * pl.ones(5)
        replicate = int(options.replicate)

        print 'Running random effects validation for:'
        print 'N', N
        print 'delta_true', delta_true
        print 'sigma_true', sigma_true
        print 'replicate', replicate

        mc.np.random.seed(1234567 + replicate)

        M = gp.Mean(validate_consistent_re_model.quadratic)
        C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50)
        gp.observe(M, C, [0, 25, 100], [-5, -3, -5])

        true = {}
        li = gp.Realization(M, C)
        true['i'] = lambda x: pl.exp(li(x))
        lr = gp.Realization(M, C)
        true['r'] = lambda x: pl.exp(lr(x))
        lf = gp.Realization(M, C)
        true['f'] = lambda x: pl.exp(lf(x))

        model = validate_consistent_re_model.validate_consistent_re(
            N, delta_true, sigma_true, true)
        model.results.to_csv(
            '%s/%s/%s-%s-%s-%s.csv' %
            (output_dir, validation_name, options.numberofrows, options.delta,
             options.sigma, options.replicate))
Beispiel #25
0
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test, spacetime_iters, top_submodel):
    # load in the data
    all_data = csv2rec(infile, use_mrecords=False)
    for m in range(number_submodels):
        if all_data['spacetime_' + str(m+1)].dtype == 'float64':
            all_data = np.delete(all_data, np.where(np.isnan(all_data['spacetime_' + str(m+1)]))[0], axis=0)

    # find the list of years for which we need to predict
    year_list = np.unique(all_data.year)

    # find the list of country/age groups
    country_age = np.array([str(all_data.iso3[i]) + '_' + str(all_data.age_group[i]) for i in range(len(all_data))])
    country_age_list = np.repeat(np.unique(country_age), len(year_list))

    # make empty arrays in which to store the results
    total_iters = np.sum(spacetime_iters)
    draws = [np.empty(len(country_age_list), 'float') for i in range(total_iters)]
    if (top_submodel > 0):
        top_submodel_draws = [np.empty(len(country_age_list), 'float') for i in range(100)]
    iso3 = np.empty(len(country_age_list), '|S3')
    age_group = np.empty(len(country_age_list), 'int')
    year = np.empty(len(country_age_list), 'int')

    # loop through country/age groups
    for ca in np.unique(country_age_list):
        print('GPRing ' + ca)

        # subset the data for this particular country/age
        ca_data = all_data[country_age==ca]

        # subset just the observed data
        if ca_data['lt_cf'].dtype != '|O8':
            ca_observed = ca_data[(np.isnan(ca_data['lt_cf'])==0) & (ca_data['test_' + test]==0)]
            if len(ca_observed) > 1:
                has_data = True
            else:
                has_data = False
        else:
            has_data = False

        # keep track of how many iterations have been added for this model
        iter_counter = 0

        # loop through each submodel
        for m in range(number_submodels):

            # identify the dependent variable for this model
            dv = dv_list[m]

            # continue making predictions if we actually need draws for this model
            if (spacetime_iters[m] > 0) or (m+1 == top_submodel):

                # skip models with no spacetime results
                if all_data['spacetime_' + str(m+1)].dtype != 'float64':
                    for i in range(spacetime_iters[m]):
                        draws[iter_counter][country_age_list==ca] = np.NaN
                        iter_counter += 1
                    if (m+1 == top_submodel):
                        for i in range(100):
                            top_submodel_draws[i][country_age_list==ca] = np.NaN
                    continue

                # make a list of the spacetime predictions
                ca_prior = np.array([np.mean(ca_data['spacetime_' + str(m+1)][ca_data.year==y]) for y in year_list])

                # find the amplitude for this country/age
                amplitude = np.mean(ca_data['spacetime_amplitude_' + str(m+1)])

                # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR
                def mean_function(x) :
                    return np.interp(x, year_list, ca_prior)

                # setup the covariance function
                M = gp.Mean(mean_function)
                C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2, amp=amplitude, scale=scale)

                # observe the data if there is any
                if has_data:
                    gp.observe(M=M, C=C, obs_mesh=ca_observed.year, obs_V=ca_observed['spacetime_data_variance_' + str(m+1)], obs_vals=ca_observed[dv])

                # draw realizations from the data
                realizations = [gp.Realization(M, C) for i in range(spacetime_iters[m])]

                # save the data for this country/age into the results array
                iso3[country_age_list==ca] = ca[0:3]
                age_group[country_age_list==ca] = ca[4:]
                year[country_age_list==ca] = year_list.T
                for i in range(spacetime_iters[m]):
                    try:
                        draws[iter_counter][country_age_list==ca] = realizations[i](year_list)
                    except:
                        print('Failure in ' + ca)
                    iter_counter += 1

                # if it's the top submodel, do 100 additional draws
                if (m+1 == top_submodel):
                    realizations = [gp.Realization(M, C) for i in range(100)]
                    for i in range(100):
                        try:
                            top_submodel_draws[i][country_age_list==ca] = realizations[i](year_list)
                        except:
                            print('Failure in ' + ca)

    # save the results
    print('Saving GPR results')
    names = ['iso3','age_group','year']
    results = np.core.records.fromarrays([iso3,age_group,year], names=names)
    for i in range(total_iters):
        results = recfunctions.append_fields(results, 'ensemble_d' + str(i+1), draws[i])
    if (top_submodel > 0):
        for i in range(100):
            results = recfunctions.append_fields(results, 'top_submodel_d' + str(i+1), top_submodel_draws[i])
    rec2csv(results, outfile)
Beispiel #26
0
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test):
    # load in the data
    all_data = csv2rec(infile, use_mrecords=False)
    for m in range(number_submodels):
        if all_data['spacetime_' + str(m+1)].dtype == 'float64':
            all_data = np.delete(all_data, np.where(np.isnan(all_data['spacetime_' + str(m+1)]))[0], axis=0)

    # find the list of years for which we need to predict
    year_list = np.unique(all_data.year)

    # find the list of country/age groups
    country_age = np.array([str(all_data.iso3[i]) + '_' + str(all_data.age_group[i]) for i in range(len(all_data))])
    country_age_list = np.repeat(np.unique(country_age), len(year_list))

    # make empty arrays in which to store the results
    draws = [np.empty(len(country_age_list), 'float') for i in range(number_submodels)]
    iso3 = np.empty(len(country_age_list), '|S3')
    age_group = np.empty(len(country_age_list), 'int')
    year = np.empty(len(country_age_list), 'int')

    # loop through country/age groups
    for ca in np.unique(country_age_list):
        print('GPRing ' + ca)

        # subset the data for this particular country/age
        ca_data = all_data[country_age==ca]

        # subset just the observed data
        if ca_data['lt_cf'].dtype != '|O8':
            ca_observed = ca_data[(np.isnan(ca_data['lt_cf'])==0) & (ca_data['test_' + test]==0)]
            if len(ca_observed) > 1:
                has_data = True
            else:
                has_data = False
        else:
            has_data = False

        # loop through each submodel
        for m in range(number_submodels):

            # skip models with no spacetime results
            if all_data['spacetime_' + str(m+1)].dtype != 'float64':
                draws[m][country_age_list==ca] = np.NaN
                continue

            # identify the dependent variable for this model
            dv = dv_list[m]

            # make a list of the spacetime predictions
            ca_prior = np.array([np.mean(ca_data['spacetime_' + str(m+1)][ca_data.year==y]) for y in year_list])

            # find the amplitude for this country/age
            amplitude = np.mean(ca_data['spacetime_amplitude_' + str(m+1)])

            # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR
            def mean_function(x) :
                return np.interp(x, year_list, ca_prior)

            # setup the covariance function
            M = gp.Mean(mean_function)
            C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2, amp=amplitude, scale=scale)

            # observe the data if there is any
            if has_data:
                gp.observe(M=M, C=C, obs_mesh=ca_observed.year, obs_V=ca_observed['spacetime_data_variance_' + str(m+1)], obs_vals=ca_observed[dv])

            # save the data for this country/age into the results array
            iso3[country_age_list==ca] = ca[0:3]
            age_group[country_age_list==ca] = ca[4:]
            year[country_age_list==ca] = year_list.T
            draws[m][country_age_list==ca] = M(year_list)

    # save the results
    print('Saving GPR results')
    names = ['iso3','age_group','year']
    results = np.core.records.fromarrays([iso3,age_group,year], names=names)
    for m in range(number_submodels):
        results = recfunctions.append_fields(results, 'gpr_' + str(m+1) + '_spacetime_mean', draws[m])
    rec2csv(results, outfile)
Beispiel #27
0
def smooth(x):
    from pymc import gp
    M = gp.Mean(lambda x: zeros(len(x)))
    C = gp.Covariance(gp.matern.euclidean, amp=1, scale=15, diff_degree=2)
    gp.observe(M, C, range(len(x)), x, .5)
    return M(range(len(x)))