Ejemplo n.º 1
0
    def create_gps(self):
        """ Create the smoothness GP gps and the diffusion-associated
        blurring GP gpd.

        ToDo: compute gpd
        """
        # Creating a mean function
        self._means = gp.Mean(constant_mean, val=0)

        # Creating a covairance function
        # The covariance function is multiplied by amp**2, and this effectively
        # multiplies realizations by amp. In other words, a larger amp
        # parameter means that realizations will deviate further from their
        # mean
        self._covs = gp.Covariance(self._cs_pymc, amp=1, R=self._r)
        self.covs_matrix = self._covs(self._fiber, self._fiber)

        # Some parameters needed to compute the inner product
        self._invcovs_matrix = numpy.linalg.inv(self.covs_matrix)
        self.alphas = numpy.asmatrix(
            numpy.dot(numpy.ones(len(self._fiber)), self._invcovs_matrix)).T

        # Normally-distributed observations on gaussian process distribution
        self.gps = gp.observe(
            self._means, self._covs, obs_mesh=self._fiber,
            obs_vals=numpy.ones(len(self._fiber)),
            obs_V=numpy.zeros(len(self._fiber)) + self._observed_variance)
Ejemplo n.º 2
0
def new_gpr_draws(df2, response, amplitude, prior, scale, has_data, draws):
    '''
    (data frame, str, float, data frame, int, int) -> array

    Using the input parameters given above runs an instance of gaussian process
    smoothing in order to account for years where we do not have data. The data
    frame (df2) is specific to a location-age.
    '''
    all_indices = df2.index
    data_indices = df2[(df2.ix[:, -4])].index
    years = df2.loc[all_indices]["year"].values
    data = pd.DataFrame({"year": years, "prior": prior})
    data.sort_values("year", inplace=True)
    data.drop_duplicates(inplace=True)

    def mean_function(x):
        return np.interp(x, data.year.values, data.prior.values)
    M = gp.Mean(mean_function)
    C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2,
                      amp=amplitude, scale=scale)
    df4 = df2.loc[data_indices]
    if has_data:
        gp.observe(M=M, C=C, obs_mesh=df4.year.values,
                   obs_vals=df4[response].values,
                   obs_V=(df4[response + "_sd"].values)**2 +
                   df4[response + "_nsv"].values)
    ca_draws = np.array([gp.Realization(M, C)(years) for d in range(draws)]).T
    return ca_draws
Ejemplo n.º 3
0
        def _setup(self):
            '''Given the current set of params, setup the interpolator.'''

            x, y, dy = self._regularize()
            if self.diff_degree is None:
                self.diff_degree = 3

            if self.amp is None:
                self.amp = num.std(y - self.mean(x))

            if self.scale is None:
                #self.scale = (self.x.max() - self.x.min())/2
                self.scale = 30

            self.M = GP.Mean(self.mean)
            self.C = GP.Covariance(GP.matern.euclidean,
                                   diff_degree=self.diff_degree,
                                   amp=self.amp,
                                   scale=self.scale)

            GP.observe(self.M,
                       self.C,
                       obs_mesh=x,
                       obs_vals=y,
                       obs_V=num.power(dy, 2))
            self.setup = True
            self.realization = None
Ejemplo n.º 4
0
    def setK(self, k):
        if k != self._k:
            deltaK = (k - self._k)
            self._k = k
            self._adjustedBounds[0] -= deltaK
            self._adjustedBounds[1] += deltaK

            self._cov = _GP.Covariance(self._c, amp=1, r=self._k)
            self._cov_matrix = self._cov(self._fiber, self._fiber)
            self._cov_inv_matrix = linalg.inv(self._cov_matrix)
            self._alpha = numpy.asmatrix(
                numpy.dot(numpy.ones(len(self._fiber)),
                          self._cov_inv_matrix)).T

            self._mean = _GP.Mean(_constant, val=0)
            self._gp = _GP.observe(self._mean,
                                   self._cov,
                                   obs_mesh=self._fiber,
                                   obs_vals=numpy.ones(len(self._fiber)),
                                   obs_V=numpy.zeros(len(self._fiber)) +
                                   self._epsilon)

            if self._precomputedVariance != None:
                self.precomputeVarianceField()
            if self._precomputedMean != None:
                self.precomputeMeanField()
Ejemplo n.º 5
0
def uninformative_prior_gp(c=0., diff_degree=2., amp=1., scale=1.5):
    """ Uninformative Mean and Covariance Priors
    Parameters
    ----------
    c : float, the prior mean
    diff_degree : float, the prior on differentiability (2 = twice differentiable?)
    amp : float, the prior on the amplitude of the Gaussian Process
    scale : float, the prior on the scale of the Gaussian Process

    Results
    -------
    M, C : mean and covariance objects
      this constitutes an uninformative prior on a Gaussian Process
      with a euclidean Matern covariance function
    """
    M = gp.Mean(const_func, c=c)
    C = gp.Covariance(gp.matern.euclidean,
                      diff_degree=diff_degree,
                      amp=amp,
                      scale=scale)

    return M, C
Ejemplo n.º 6
0
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test):
    # load in the data
    all_data = csv2rec(infile, use_mrecords=False)
    for m in range(number_submodels):
        if all_data['spacetime_' + str(m+1)].dtype == 'float64':
            all_data = np.delete(all_data, np.where(np.isnan(all_data['spacetime_' + str(m+1)]))[0], axis=0)

    # find the list of years for which we need to predict
    year_list = np.unique(all_data.year)

    # find the list of country/age groups
    country_age = np.array([str(all_data.iso3[i]) + '_' + str(all_data.age_group[i]) for i in range(len(all_data))])
    country_age_list = np.repeat(np.unique(country_age), len(year_list))

    # make empty arrays in which to store the results
    draws = [np.empty(len(country_age_list), 'float') for i in range(number_submodels)]
    iso3 = np.empty(len(country_age_list), '|S3')
    age_group = np.empty(len(country_age_list), 'int')
    year = np.empty(len(country_age_list), 'int')

    # loop through country/age groups
    for ca in np.unique(country_age_list):
        print('GPRing ' + ca)

        # subset the data for this particular country/age
        ca_data = all_data[country_age==ca]

        # subset just the observed data
        if ca_data['lt_cf'].dtype != '|O8':
            ca_observed = ca_data[(np.isnan(ca_data['lt_cf'])==0) & (ca_data['test_' + test]==0)]
            if len(ca_observed) > 1:
                has_data = True
            else:
                has_data = False
        else:
            has_data = False

        # loop through each submodel
        for m in range(number_submodels):

            # skip models with no spacetime results
            if all_data['spacetime_' + str(m+1)].dtype != 'float64':
                draws[m][country_age_list==ca] = np.NaN
                continue

            # identify the dependent variable for this model
            dv = dv_list[m]

            # make a list of the spacetime predictions
            ca_prior = np.array([np.mean(ca_data['spacetime_' + str(m+1)][ca_data.year==y]) for y in year_list])

            # find the amplitude for this country/age
            amplitude = np.mean(ca_data['spacetime_amplitude_' + str(m+1)])

            # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR
            def mean_function(x) :
                return np.interp(x, year_list, ca_prior)

            # setup the covariance function
            M = gp.Mean(mean_function)
            C = gp.Covariance(eval_fun=gp.matern.euclidean, diff_degree=2, amp=amplitude, scale=scale)

            # observe the data if there is any
            if has_data:
                gp.observe(M=M, C=C, obs_mesh=ca_observed.year, obs_V=ca_observed['spacetime_data_variance_' + str(m+1)], obs_vals=ca_observed[dv])

            # save the data for this country/age into the results array
            iso3[country_age_list==ca] = ca[0:3]
            age_group[country_age_list==ca] = ca[4:]
            year[country_age_list==ca] = year_list.T
            draws[m][country_age_list==ca] = M(year_list)

    # save the results
    print('Saving GPR results')
    names = ['iso3','age_group','year']
    results = np.core.records.fromarrays([iso3,age_group,year], names=names)
    for m in range(number_submodels):
        results = recfunctions.append_fields(results, 'gpr_' + str(m+1) + '_spacetime_mean', draws[m])
    rec2csv(results, outfile)
    else:
        N = int(options.numberofrows)
        delta_true = float(options.delta)
        sigma_true = float(options.sigma) * pl.ones(5)
        replicate = int(options.replicate)

        print 'Running random effects validation for:'
        print 'N', N
        print 'delta_true', delta_true
        print 'sigma_true', sigma_true
        print 'replicate', replicate

        mc.np.random.seed(1234567 + replicate)

        M = gp.Mean(validate_consistent_re_model.quadratic)
        C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50)
        gp.observe(M, C, [0, 25, 100], [-5, -3, -5])

        true = {}
        li = gp.Realization(M, C)
        true['i'] = lambda x: pl.exp(li(x))
        lr = gp.Realization(M, C)
        true['r'] = lambda x: pl.exp(lr(x))
        lf = gp.Realization(M, C)
        true['f'] = lambda x: pl.exp(lf(x))

        model = validate_consistent_re_model.validate_consistent_re(
            N, delta_true, sigma_true, true)
        model.results.to_csv(
            '%s/%s/%s-%s-%s-%s.csv' %
            (output_dir, validation_name, options.numberofrows, options.delta,
Ejemplo n.º 8
0
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, iters):
    # load in the data
    all_data = csv2rec(infile, use_mrecords=False)
    for m in range(number_submodels):
        all_data = np.delete(
            all_data,
            np.where(np.isnan(all_data['spacetime_' + str(m + 1)]))[0],
            axis=0)

    # Investigate error thrown for HKG, MAC, and SGP... they don't have data, but don't know why this is breaking line 62
    all_data = all_data[all_data['iso3'] != "HKG"]
    all_data = all_data[all_data['iso3'] != "MAC"]
    all_data = all_data[all_data['iso3'] != "SGP"]

    # find the list of years for which we need to predict
    year_list = np.unique(all_data.year)

    # find the list of country/age groups
    country_age = np.array([all_data.iso3[i] for i in range(len(all_data))])
    country_age_list = np.repeat(np.unique(country_age), len(year_list))

    # make empty arrays in which to store the results
    draws = [
        np.empty(len(country_age_list), 'float')
        for i in range(iters * number_submodels * 2)
    ]
    iso3 = np.empty(len(country_age_list), '|S3')
    # age_group = np.empty(len(country_age_list), 'int')
    year = np.empty(len(country_age_list), 'int')

    # loop through country/age groups
    for ca in np.unique(country_age_list):

        print('GPRing ' + ca)

        # subset the data for this particular country/age
        ca_data = all_data[country_age == ca]

        # subset just the observed data
        if ca_data['lt_prev'].dtype != '|O8':
            ca_observed = ca_data[(np.isnan(ca_data['lt_prev']) == 0)]
            if len(ca_observed) > 1:
                has_data = True
            else:
                has_data = False
        else:
            has_data = False

        # loop through each submodel
        for m in range(number_submodels):

            # identify the dependent variable for this model
            dv = dv_list[m]

            # loop through spacetime/linear
            for x, t in enumerate(['spacetime']):

                # make a list of the spacetime predictions
                ca_prior = np.array([
                    np.mean(ca_data[t + '_' + str(m + 1)][ca_data.year == y])
                    for y in year_list
                ])

                # find the amplitude for this country/age
                amplitude = np.mean(ca_data[t + '_amplitude_' + str(m + 1)])

                # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR
                def mean_function(x):
                    return np.interp(x, year_list, ca_prior)

                # setup the covariance function
                M = gp.Mean(mean_function)
                C = gp.Covariance(eval_fun=gp.matern.euclidean,
                                  diff_degree=2,
                                  amp=amplitude,
                                  scale=scale)

                # observe the data if there is any
                if has_data:
                    gp.observe(M=M,
                               C=C,
                               obs_mesh=ca_observed.year,
                               obs_V=ca_observed[t + '_data_variance_' +
                                                 str(m + 1)],
                               obs_vals=ca_observed['lt_prev'])

                # draw realizations from the data
                realizations = [gp.Realization(M, C) for i in range(iters)]

                # save the data for this country/age into the results array
                iso3[country_age_list == ca] = ca[0:3]
                # age_group[country_age_list==ca] = ca[4:]
                year[country_age_list == ca] = year_list.T
                for i in range(iters):
                    draws[((2 * m + x) * iters) + i][
                        country_age_list == ca] = realizations[i](year_list)

    # save the results
    print('Saving GPR results')
    names = ['iso3', 'age_group', 'year']
    results = np.core.records.fromarrays([iso3, year], names=names)
    for m in range(number_submodels):
        for x, t in enumerate(['spacetime']):
            for i in range(iters):
                results = recfunctions.append_fields(
                    results, 'gpr_' + str(m + 1) + '_' + t + '_d' + str(i + 1),
                    draws[((2 * m + x) * iters) + i])
            results = recfunctions.append_fields(
                results, 'gpr_' + str(m + 1) + '_' + t + '_mean',
                np.mean(draws[((2 * m + x) * iters):((2 * m + x + 1) * iters)],
                        axis=0))
        rec2csv(results, outfile)
Ejemplo n.º 9
0
def fit_gpr(
        df, amp, obs_variable='observed_data',
        obs_var_variable='obs_data_variance', mean_variable='st_prediction',
        year_variable='year_id', scale=10, diff_degree=2, draws=0):

    initial_columns = list(df.columns)

    data = df[(df[obs_variable].notnull()) & (df[obs_var_variable].notnull())]
    mean_prior = df[[year_variable, mean_variable]].drop_duplicates()

    def mean_function(x):
        return np.interp(
                x, mean_prior[year_variable], mean_prior[mean_variable])

    M = gp.Mean(mean_function)
    C = gp.Covariance(
            eval_fun=gp.matern.euclidean, diff_degree=diff_degree,
            amp=amp, scale=scale)

    if len(data) > 0:
        gp.observe(
                M=M, C=C, obs_mesh=data[year_variable],
                obs_V=data[obs_var_variable], obs_vals=data[obs_variable])

    model_mean = M(mean_prior[year_variable]).T
    # model_variance = np.diagonal(C(p_years,p_years)).T
    model_variance = C(mean_prior[year_variable])
    model_lower = model_mean - np.sqrt(model_variance)*1.96
    model_upper = model_mean + np.sqrt(model_variance)*1.96

    if draws > 0:
        """
        The pymc version of drawing realizations... slower than just
        sampling directly from the MVN, but should give the same result:

        realizations = [
            gp.Realization(M, C)(range(1980,2014)) for i in range(draws)]
        """

        real_draws = pd.DataFrame({
            year_variable: mean_prior[year_variable],
            'gpr_mean': model_mean, 'gpr_var': model_variance,
            'gpr_lower': model_lower, 'gpr_upper': model_upper})

        realizations = np.random.multivariate_normal(
                model_mean,
                C(mean_prior[year_variable], mean_prior[year_variable]), draws)

        for i, r in enumerate(realizations):
            real_draws["draw_"+str(i)] = r

        real_draws = pd.merge(df, real_draws, on=year_variable, how='left')
        # gpr_columns = list(set(real_draws.columns) - set(initial_columns))
        gpr_columns = ['gpr_mean', 'gpr_var', 'gpr_lower', 'gpr_upper']
        draw_columns = ['draw_'+str(i) for i in range(draws)]
        initial_columns.extend(gpr_columns)
        initial_columns.extend(draw_columns)

        return real_draws[initial_columns]

    else:
        results = pd.DataFrame({
            year_variable: mean_prior[year_variable], 'gpr_mean': model_mean,
            'gpr_var': model_variance, 'gpr_lower': model_lower,
            'gpr_upper': model_upper})
        gpr_columns = list(set(results.columns) - set(initial_columns))
        initial_columns.extend(gpr_columns)
        results = pd.merge(df, results, on=year_variable, how='left')

        return results
Ejemplo n.º 10
0
def fit_gpr(df,
            amp,
            obs_variable='observed_data',
            obs_var_variable='obs_data_variance',
            mean_variable='st_prediction',
            year_variable='year',
            scale=40,
            diff_degree=2,
            draws=0):

    initial_columns = list(df.columns)

    data = df.ix[(pd.notnull(df[obs_variable]))
                 & (pd.notnull(df[obs_var_variable]))]
    mean_prior = df[[year_variable, mean_variable]].drop_duplicates()

    def mean_function(x):
        return np.interp(x, mean_prior[year_variable],
                         mean_prior[mean_variable])

    M = gp.Mean(mean_function)
    C = gp.Covariance(eval_fun=gp.matern.euclidean,
                      diff_degree=diff_degree,
                      amp=amp,
                      scale=scale)

    if len(data) > 0:
        gp.observe(M=M,
                   C=C,
                   obs_mesh=data[year_variable],
                   obs_V=data[obs_var_variable],
                   obs_vals=data[obs_variable])

    model_mean = M(mean_prior[year_variable]).T
    #model_variance = np.diagonal(C(p_years,p_years)).T
    model_variance = C(mean_prior[year_variable])
    model_lower = model_mean - np.sqrt(model_variance) * 1.96
    model_upper = model_mean + np.sqrt(model_variance) * 1.96

    if draws > 0:
        realizations = [
            gp.Realization(M, C)(range(min(mean_prior['year']),
                                       max(mean_prior['year']) + 1))
            for i in range(draws)
        ]

        real_draws = pd.DataFrame({
            year_variable: mean_prior[year_variable],
            'gpr_mean': model_mean,
            'gpr_var': model_variance,
            'gpr_lower': model_lower,
            'gpr_upper': model_upper
        })

        for i, r in enumerate(realizations):
            real_draws["draw" + str(i)] = r

        real_draws = pd.merge(df, real_draws, on=year_variable, how='left')
        gpr_columns = list(set(real_draws.columns) - set(initial_columns))
        initial_columns.extend(gpr_columns)

        return real_draws[initial_columns]

    else:
        results = pd.DataFrame({
            year_variable: mean_prior[year_variable],
            'gpr_mean': model_mean,
            'gpr_var': model_variance,
            'gpr_lower': model_lower,
            'gpr_upper': model_upper
        })
        gpr_columns = list(set(results.columns) - set(initial_columns))
        initial_columns.extend(gpr_columns)
        results = pd.merge(df, results, on=year_variable, how='left')

        return results
Ejemplo n.º 11
0
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test,
            spacetime_iters, top_submodel):
    # load in the data
    all_data = csv2rec(infile, use_mrecords=False)
    for m in range(number_submodels):
        if all_data['spacetime_' + str(m + 1)].dtype == 'float64':
            all_data = np.delete(
                all_data,
                np.where(np.isnan(all_data['spacetime_' + str(m + 1)]))[0],
                axis=0)

    # find the list of years for which we need to predict
    year_list = np.unique(all_data.year)

    # find the list of country/age groups
    country_age = np.array([
        str(all_data.iso3[i]) + '_' + str(all_data.age_group[i])
        for i in range(len(all_data))
    ])
    country_age_list = np.repeat(np.unique(country_age), len(year_list))

    # make empty arrays in which to store the results
    total_iters = np.sum(spacetime_iters)
    draws = [
        np.empty(len(country_age_list), 'float') for i in range(total_iters)
    ]
    if (top_submodel > 0):
        top_submodel_draws = [
            np.empty(len(country_age_list), 'float') for i in range(100)
        ]
    iso3 = np.empty(len(country_age_list), '|S3')
    age_group = np.empty(len(country_age_list), 'int')
    year = np.empty(len(country_age_list), 'int')

    # loop through country/age groups
    for ca in np.unique(country_age_list):
        print('GPRing ' + ca)

        # subset the data for this particular country/age
        ca_data = all_data[country_age == ca]

        # subset just the observed data
        if ca_data['lt_cf'].dtype != '|O8':
            ca_observed = ca_data[(np.isnan(ca_data['lt_cf']) == 0)
                                  & (ca_data['test_' + test] == 0)]
            if len(ca_observed) > 1:
                has_data = True
            else:
                has_data = False
        else:
            has_data = False

        # keep track of how many iterations have been added for this model
        iter_counter = 0

        # loop through each submodel
        for m in range(number_submodels):

            # identify the dependent variable for this model
            dv = dv_list[m]

            # continue making predictions if we actually need draws for this model
            if (spacetime_iters[m] > 0) or (m + 1 == top_submodel):

                # skip models with no spacetime results
                if all_data['spacetime_' + str(m + 1)].dtype != 'float64':
                    for i in range(spacetime_iters[m]):
                        draws[iter_counter][country_age_list == ca] = np.NaN
                        iter_counter += 1
                    if (m + 1 == top_submodel):
                        for i in range(100):
                            top_submodel_draws[i][country_age_list ==
                                                  ca] = np.NaN
                    continue

                # make a list of the spacetime predictions
                ca_prior = np.array([
                    np.mean(ca_data['spacetime_' +
                                    str(m + 1)][ca_data.year == y])
                    for y in year_list
                ])

                # find the amplitude for this country/age
                amplitude = np.mean(ca_data['spacetime_amplitude_' +
                                            str(m + 1)])

                # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR
                def mean_function(x):
                    return np.interp(x, year_list, ca_prior)

                # setup the covariance function
                M = gp.Mean(mean_function)
                C = gp.Covariance(eval_fun=gp.matern.euclidean,
                                  diff_degree=2,
                                  amp=amplitude,
                                  scale=scale)

                # observe the data if there is any
                if has_data:
                    gp.observe(M=M,
                               C=C,
                               obs_mesh=ca_observed.year,
                               obs_V=ca_observed['spacetime_data_variance_' +
                                                 str(m + 1)],
                               obs_vals=ca_observed[dv])

                # draw realizations from the data
                realizations = [
                    gp.Realization(M, C) for i in range(spacetime_iters[m])
                ]

                # save the data for this country/age into the results array
                iso3[country_age_list == ca] = ca[0:3]
                age_group[country_age_list == ca] = ca[4:]
                year[country_age_list == ca] = year_list.T
                for i in range(spacetime_iters[m]):
                    try:
                        draws[iter_counter][country_age_list ==
                                            ca] = realizations[i](year_list)
                    except:
                        print('Failure in ' + ca)
                    iter_counter += 1

                # if it's the top submodel, do 100 additional draws
                if (m + 1 == top_submodel):
                    realizations = [gp.Realization(M, C) for i in range(100)]
                    for i in range(100):
                        try:
                            top_submodel_draws[i][country_age_list ==
                                                  ca] = realizations[i](
                                                      year_list)
                        except:
                            print('Failure in ' + ca)

    # save the results
    print('Saving GPR results')
    names = ['iso3', 'age_group', 'year']
    results = np.core.records.fromarrays([iso3, age_group, year], names=names)
    for i in range(total_iters):
        results = recfunctions.append_fields(results,
                                             'ensemble_d' + str(i + 1),
                                             draws[i])
    if (top_submodel > 0):
        for i in range(100):
            results = recfunctions.append_fields(results,
                                                 'top_submodel_d' + str(i + 1),
                                                 top_submodel_draws[i])
    rec2csv(results, outfile)
Ejemplo n.º 12
0
def plus_minus(arr,
               bins=30,
               conf=0.68,
               xrange=None,
               func='poly',
               fit_log=True,
               order=7,
               debug=False,
               zero_pad=False,
               end_tol=[None, None]):
    hist0, bins = histogram(arr, bins=bins, range=xrange)
    xb = (bins[1:] + bins[:-1]) / 2
    if fit_log:
        gids = greater(hist0, 0)
        xb = xb[gids]
        var = 1. / hist0[gids]
        hist = log(hist0[gids])
    else:
        var = hist0 * 1
        hist = hist0 * 1
    if xrange is None:
        xrange = (bins[0], bins[-1])
    xplot = linspace(xrange[0] * 0.9, xrange[1] * 1.1, 101)
    if debug:
        fig = plt.figure()
        if fit_log:
            y1 = hist
            y2 = exp(hist)
        else:
            y1 = log(hist)
            y2 = hist
        ax1 = fig.add_subplot(211)
        ax1.plot(xb, y1, 'o')
        ax2 = fig.add_subplot(212)
        ax2.plot(xb, y2, 'o')

    if func == 'gp' or func == 'poly':
        if func == 'gp':
            if not gp:
                raise RuntimeError, "To use GP interpolation, you need to install pymc"
            scale = xb.max() - xb.min()
            M = gp.Mean(
                lambda x: zeros(x.shape[0], dtype=float32) + median(hist))
            C = gp.Covariance(gp.matern.euclidean,
                              diff_degree=3,
                              scale=scale * 0.5,
                              amp=std(hist))

            # Pad with zeros
            if zero_pad and not fit_log:
                obs_mesh = concatenate([
                    xb.min() + (xb - xb.max())[:-1], xb,
                    xb.max() + (xb - xb.min())[1:]
                ])
                obs = concatenate([hist[1:] * 0, hist, hist[1:] * 0])
                var = concatenate([hist[1:] * 0, var, hist[1:] * 0])
            else:
                obs_mesh = xb
                obs = hist
            gp.observe(M, C, obs_mesh=obs_mesh, obs_vals=obs, obs_V=var)

            func = lambda x: wrap_M(x, M, xb[0], xb[-1], log=fit_log)

        else:
            x0 = xb[argmax(hist)]
            pars, epars = fit_poly.fitpoly(xb,
                                           hist,
                                           w=1. / var,
                                           x0=x0,
                                           k=order)
            func = lambda x: wrap_poly(x, x0, pars, xb[0], xb[-1], log=fit_log)

        if debug:
            ax1.plot(xplot, log(func(xplot)), '-')
            ax2.plot(xplot, func(xplot), '-')
        oneside = False
        if argmax(hist) == 0:
            mod = xb[0]
            oneside = True
        elif argmax(hist) == len(xb) - 1:
            mod = xb[-1]
            oneside = True
        else:
            mod0 = xb[argmax(hist)]
            try:
                mod = brent(lambda x: -func(x),
                            brack=(xb.min(), mod0, xb.max()))
            except:
                # monotonic.  Take extremum
                oneside = True
                if func(xb[0]) > func(xb[-1]):
                    mod = xb[0]
                else:
                    mod = xb[-1]

        fac = integrate.quad(func, xb[0], xb[-1])[0]
        prob = lambda x: func(x) / fac

        #end tolerance  if requested
        lower_limit = False
        upper_limit = False
        if end_tol[0] is not None and float(
                hist0[0]) / hist0.max() > end_tol[0]:
            lower_limit = True
        if end_tol[1] is not None and float(
                hist0[-1]) / hist0.max() > end_tol[1]:
            upper_limit = True
        if lower_limit and upper_limit:
            #  too flat, return mode, but no limits
            return mod, nan, nan
        elif lower_limit and not upper_limit:
            # one-sided
            tail = (1 - conf)
            upper = brentq(\
                  lambda x: integrate.quad(prob, x, xplot[-1])[0]-tail,
                     mod, xplot[-1])
            return mod, nan, upper
        elif upper_limit and not lower_limit:
            tail = (1 - conf)
            lower = brentq(\
                  lambda x: integrate.quad(prob, xplot[0], x)[0]-tail,
                  xplot[0], xplot[-1])
            return mod, lower, nan

        if debug:
            ax1.axvline(mod, color='red')
            ax2.axvline(mod, color='red')

        if oneside:
            tail = (1 - conf)
        else:
            tail = (1 - conf) / 2
        if integrate.quad(prob, xplot[0], mod)[0] < tail:
            # No lower bound
            minus = nan
        else:
            lower = brentq(\
                  lambda x: integrate.quad(prob, xplot[0], x)[0]-tail,
                  xplot[0], mod)
            minus = mod - lower
            if debug:
                ax1.axvline(lower, color='orange')
                ax2.axvline(lower, color='orange')
        #test for upper bound
        if integrate.quad(prob, mod, xplot[-1])[0] < tail:
            # No upper bound
            plus = nan
        else:
            upper = brentq(\
                  lambda x: integrate.quad(prob, x, xplot[-1])[0]-tail,
                     mod, xplot[-1])
            plus = upper - mod
            if debug:
                ax1.axvline(upper, color='orange')
                ax2.axvline(upper, color='orange')

    else:
        hist = hist * 1.0 / sum(hist)
        mid = argmax(hist)
        mod = xb[mid]
        if debug:
            ax1.axvline(mod, color='red')
            ax2.axvline(mod, color='red')
        i0 = 0
        i1 = len(hist) - 1
        prob = 0
        while (prob < (1 - conf) / 2):
            if i0 < mid:
                i0 += 1
            else:
                break
            prob = sum(hist[0:i0])
        if i0 == 0:
            lower = None
        else:
            lower = xb[i0]
            if debug:
                ax1.axvline(lower, color='orange')
                ax2.axvline(lower, color='orange')
        while (prob < 1 - conf):
            if i1 > mid:
                i1 -= 1
            else:
                break
            prob = sum(hist[0:i0]) + sum(hist[i1:])
        if i1 == len(xb) - 1:
            upper = None
        else:
            upper = xb[i1]
            if debug:
                ax1.axvline(upper, color='orange')
                ax2.axvline(upper, color='orange')
        if upper is not None:
            plus = upper - mod
        else:
            plus = nan
        if lower is not None:
            minus = mod - lower
        else:
            minus = nan
    return mod, minus, plus
Ejemplo n.º 13
0
    def plot2dsurf(self,
                   param1,
                   param2,
                   ax=None,
                   xrange=None,
                   yrange=None,
                   bins=30,
                   smooth=False,
                   bfac=2,
                   sfac=1.,
                   dd=3,
                   cmap=cm.gray_r,
                   levels=[],
                   ccolor='red',
                   fill=False,
                   ccmap=None,
                   falpha=1.0,
                   outfile=None,
                   zorder=None):
        '''Plot up a 2D binned paramter plot for [param1] and [param2].
      if [ax] is supplied, use it to plot, otherwise, open up a new figure
      and axes.  You can specify [xrange] and [yrange].  [bins] will be
      passed to histogram2d.  If [smooth], the binned surface is smoothed
      using either a bivariate spline or a Gaussian Process (if pymc.gp is
      available).  If [cmap] is None, no image is drawn.  If [levels] is
      specified as fractions (0.68, 0.95, etc), draw the contours that
      enclose this fraction of the data.'''
        if ax is None:
            fig = plt.figure()
            ax = fig.add_subplot(111)
            own_ax = True
        else:
            own_ax = False

        #if ccmap is not None and ccolor is not None:
        #   # Cmap takes precedence
        #   ccolor = None

        tr1 = self.get_trace0(param1)
        tr2 = self.get_trace0(param2)
        if len(tr1.shape) != 1 or len(tr2.shape) != 1:
            raise RuntimeError, "Error, variables must be scalars, try using ':' notation"
        #tr1 = tr1[:,0]
        #tr2 = tr2[:,0]
        range = [[tr1.min(), tr1.max()], [tr2.min(), tr2.max()]]
        if xrange is not None:
            range[0] = list(xrange)
        if yrange is not None:
            range[1] = list(yrange)

        # first, bin up the data (all of it)
        grid, xs, ys = histogram2d(tr1, tr1, bins=bins, range=range)
        grid = grid.T * 1.0
        xplot = linspace(xs[0], xs[-1], 101)
        yplot = linspace(ys[0], ys[-1], 101)
        extent = [xs[0], xs[-1], ys[0], ys[-1]]

        xs = (xs[1:] + xs[:-1]) / 2
        ys = (ys[1:] + ys[:-1]) / 2

        x, y = meshgrid(xs, ys)
        tx = xs[::bfac]
        ty = ys[::bfac]
        if smooth and not gp:
            tck = bisplrep(ravel(x),
                           ravel(y),
                           ravel(grid),
                           task=-1,
                           tx=tx,
                           ty=ty)
            x = linspace(xs[0], xs[-1], 501)
            y = linspace(ys[0], ys[-1], 501)
            grid = bisplev(x, y, tck).T
        elif smooth and gp:
            M = gp.Mean(
                lambda x: zeros(x.shape[:-1], dtype=float) + median(grid))
            scalerat = (tr2.max() - tr2.min()) / (tr1.max() - tr1.min())
            C = gp.Covariance(gp.matern.aniso_euclidean,
                              diff_degree=dd,
                              scale=(tr1.max() - tr1.min()) * sfac,
                              amp=std(grid),
                              scalerat=scalerat)
            x, y = meshgrid(xs, ys)
            mesh = vstack((ravel(x), ravel(y))).T
            gp.observe(M,
                       C,
                       obs_mesh=mesh,
                       obs_vals=ravel(grid),
                       obs_V=ravel(grid))
            dplot = dstack(meshgrid(xplot, yplot))
            grid, Vsurf = gp.point_eval(M, C, dplot)

        grid = where(grid < 0, 0, grid)

        if cmap:
            ax.imshow(grid,
                      extent=extent,
                      origin='lower',
                      aspect='auto',
                      interpolation='nearest',
                      cmap=cmap)
        if levels:
            prob = ravel(grid) / sum(grid)
            sprob = sort(prob)
            cprob = 1.0 - cumsum(sprob)
            clevels = []
            for l in levels:
                id = nonzero(greater(cprob - l, 0))[0][-1]
                clevels.append(sprob[id])
            prob.shape = grid.shape

            clevels.sort()
            norm = Normalize(clevels[0] * 0.5, clevels[-1] * 1.3)
            if fill:
                ax.contourf(prob,
                            levels=clevels + [1],
                            extent=extent,
                            origin='lower',
                            alpha=falpha,
                            cmap=ccmap,
                            norm=norm,
                            zorder=zorder)
            ax.contour(prob,
                       levels=clevels,
                       colors=ccolor,
                       extent=extent,
                       origin='lower',
                       linewidths=2,
                       zorder=zorder)

        if own_ax:
            ax.set_xlabel("$%s$" % param1)
            ax.set_ylabel("$%s$" % param2)
            if xrange is not None:
                ax.set_xlim(xrange[0], xrange[1])
            if yrange is not None:
                ax.set_ylim(yrange[0], yrange[1])
            plt.draw()
            if outfile is not None:
                fig.savefig(outfile)
            return fig
Ejemplo n.º 14
0
def smooth(x):
    from pymc import gp
    M = gp.Mean(lambda x: zeros(len(x)))
    C = gp.Covariance(gp.matern.euclidean, amp=1, scale=15, diff_degree=2)
    gp.observe(M, C, range(len(x)), x, .5)
    return M(range(len(x)))