コード例 #1
0
    def setK(self, k):
        if k != self._k:
            deltaK = (k - self._k)
            self._k = k
            self._adjustedBounds[0] -= deltaK
            self._adjustedBounds[1] += deltaK

            self._cov = _GP.Covariance(self._c, amp=1, r=self._k)
            self._cov_matrix = self._cov(self._fiber, self._fiber)
            self._cov_inv_matrix = linalg.inv(self._cov_matrix)
            self._alpha = numpy.asmatrix(
                numpy.dot(numpy.ones(len(self._fiber)),
                          self._cov_inv_matrix)).T

            self._mean = _GP.Mean(_constant, val=0)
            self._gp = _GP.observe(self._mean,
                                   self._cov,
                                   obs_mesh=self._fiber,
                                   obs_vals=numpy.ones(len(self._fiber)),
                                   obs_V=numpy.zeros(len(self._fiber)) +
                                   self._epsilon)

            if self._precomputedVariance != None:
                self.precomputeVarianceField()
            if self._precomputedMean != None:
                self.precomputeMeanField()
コード例 #2
0
def uninformative_prior_gp(c=0., diff_degree=2., amp=1., scale=1.5):
    """ Uninformative Mean and Covariance Priors
    Parameters
    ----------
    c : float, the prior mean
    diff_degree : float, the prior on differentiability (2 = twice differentiable?)
    amp : float, the prior on the amplitude of the Gaussian Process
    scale : float, the prior on the scale of the Gaussian Process

    Results
    -------
    M, C : mean and covariance objects
      this constitutes an uninformative prior on a Gaussian Process
      with a euclidean Matern covariance function
    """
    M = gp.Mean(const_func, c=c)
    C = gp.Covariance(gp.matern.euclidean,
                      diff_degree=diff_degree,
                      amp=amp,
                      scale=scale)

    return M, C
コード例 #3
0
ファイル: GPR_draws.py プロジェクト: statdtedm/CODEm-2010
def fit_GPR(infile, outfile, dv_list, scale, number_submodels, test,
            spacetime_iters, top_submodel):
    # load in the data
    all_data = csv2rec(infile, use_mrecords=False)
    for m in range(number_submodels):
        if all_data['spacetime_' + str(m + 1)].dtype == 'float64':
            all_data = np.delete(
                all_data,
                np.where(np.isnan(all_data['spacetime_' + str(m + 1)]))[0],
                axis=0)

    # find the list of years for which we need to predict
    year_list = np.unique(all_data.year)

    # find the list of country/age groups
    country_age = np.array([
        str(all_data.iso3[i]) + '_' + str(all_data.age_group[i])
        for i in range(len(all_data))
    ])
    country_age_list = np.repeat(np.unique(country_age), len(year_list))

    # make empty arrays in which to store the results
    total_iters = np.sum(spacetime_iters)
    draws = [
        np.empty(len(country_age_list), 'float') for i in range(total_iters)
    ]
    if (top_submodel > 0):
        top_submodel_draws = [
            np.empty(len(country_age_list), 'float') for i in range(100)
        ]
    iso3 = np.empty(len(country_age_list), '|S3')
    age_group = np.empty(len(country_age_list), 'int')
    year = np.empty(len(country_age_list), 'int')

    # loop through country/age groups
    for ca in np.unique(country_age_list):
        print('GPRing ' + ca)

        # subset the data for this particular country/age
        ca_data = all_data[country_age == ca]

        # subset just the observed data
        if ca_data['lt_cf'].dtype != '|O8':
            ca_observed = ca_data[(np.isnan(ca_data['lt_cf']) == 0)
                                  & (ca_data['test_' + test] == 0)]
            if len(ca_observed) > 1:
                has_data = True
            else:
                has_data = False
        else:
            has_data = False

        # keep track of how many iterations have been added for this model
        iter_counter = 0

        # loop through each submodel
        for m in range(number_submodels):

            # identify the dependent variable for this model
            dv = dv_list[m]

            # continue making predictions if we actually need draws for this model
            if (spacetime_iters[m] > 0) or (m + 1 == top_submodel):

                # skip models with no spacetime results
                if all_data['spacetime_' + str(m + 1)].dtype != 'float64':
                    for i in range(spacetime_iters[m]):
                        draws[iter_counter][country_age_list == ca] = np.NaN
                        iter_counter += 1
                    if (m + 1 == top_submodel):
                        for i in range(100):
                            top_submodel_draws[i][country_age_list ==
                                                  ca] = np.NaN
                    continue

                # make a list of the spacetime predictions
                ca_prior = np.array([
                    np.mean(ca_data['spacetime_' +
                                    str(m + 1)][ca_data.year == y])
                    for y in year_list
                ])

                # find the amplitude for this country/age
                amplitude = np.mean(ca_data['spacetime_amplitude_' +
                                            str(m + 1)])

                # make a linear interpolation of the spatio-temporal predictions to use as the mean function for GPR
                def mean_function(x):
                    return np.interp(x, year_list, ca_prior)

                # setup the covariance function
                M = gp.Mean(mean_function)
                C = gp.Covariance(eval_fun=gp.matern.euclidean,
                                  diff_degree=2,
                                  amp=amplitude,
                                  scale=scale)

                # observe the data if there is any
                if has_data:
                    gp.observe(M=M,
                               C=C,
                               obs_mesh=ca_observed.year,
                               obs_V=ca_observed['spacetime_data_variance_' +
                                                 str(m + 1)],
                               obs_vals=ca_observed[dv])

                # draw realizations from the data
                realizations = [
                    gp.Realization(M, C) for i in range(spacetime_iters[m])
                ]

                # save the data for this country/age into the results array
                iso3[country_age_list == ca] = ca[0:3]
                age_group[country_age_list == ca] = ca[4:]
                year[country_age_list == ca] = year_list.T
                for i in range(spacetime_iters[m]):
                    try:
                        draws[iter_counter][country_age_list ==
                                            ca] = realizations[i](year_list)
                    except:
                        print('Failure in ' + ca)
                    iter_counter += 1

                # if it's the top submodel, do 100 additional draws
                if (m + 1 == top_submodel):
                    realizations = [gp.Realization(M, C) for i in range(100)]
                    for i in range(100):
                        try:
                            top_submodel_draws[i][country_age_list ==
                                                  ca] = realizations[i](
                                                      year_list)
                        except:
                            print('Failure in ' + ca)

    # save the results
    print('Saving GPR results')
    names = ['iso3', 'age_group', 'year']
    results = np.core.records.fromarrays([iso3, age_group, year], names=names)
    for i in range(total_iters):
        results = recfunctions.append_fields(results,
                                             'ensemble_d' + str(i + 1),
                                             draws[i])
    if (top_submodel > 0):
        for i in range(100):
            results = recfunctions.append_fields(results,
                                                 'top_submodel_d' + str(i + 1),
                                                 top_submodel_draws[i])
    rec2csv(results, outfile)
コード例 #4
0
        print "\ninspect with:\nresults.unstack()['mare', '50%'].unstack() # for example"
        print "or: results.unstack()['mare', '50%'].unstack(2).reindex(columns='Very Moderately Slightly'.split())"

    else:
        N = int(options.numberofrows)
        delta_true = float(options.delta)
        sigma_true = float(options.sigma) * pl.ones(5)
        replicate = int(options.replicate)
        smoothness = options.smoothing

        print 'Running random effects validation for:'
        print 'N', N
        print 'delta_true', delta_true
        print 'sigma_true', sigma_true
        print 'replicate', replicate
        print 'smoothness', smoothness

        M = gp.Mean(validate_age_integrating_re.quadratic)
        C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50)
        gp.observe(M, C, [0, 25, 100], [-5, -3, -5])

        log_p = gp.Realization(M, C)
        true_p = lambda x: pl.exp(log_p(x))

        model = validate_age_integrating_re.validate_ai_re(
            N, delta_true, sigma_true, true_p, smoothness)
        model.results.to_csv(
            '%s/%s/%s-%s-%s-%s.csv' %
            (output_dir, validation_name, options.numberofrows, options.delta,
             options.sigma, options.replicate))
コード例 #5
0
ファイル: MCMCstats.py プロジェクト: obscode/CSPMCMC
def plus_minus(arr,
               bins=30,
               conf=0.68,
               xrange=None,
               func='poly',
               fit_log=True,
               order=7,
               debug=False,
               zero_pad=False,
               end_tol=[None, None]):
    hist0, bins = histogram(arr, bins=bins, range=xrange)
    xb = (bins[1:] + bins[:-1]) / 2
    if fit_log:
        gids = greater(hist0, 0)
        xb = xb[gids]
        var = 1. / hist0[gids]
        hist = log(hist0[gids])
    else:
        var = hist0 * 1
        hist = hist0 * 1
    if xrange is None:
        xrange = (bins[0], bins[-1])
    xplot = linspace(xrange[0] * 0.9, xrange[1] * 1.1, 101)
    if debug:
        fig = plt.figure()
        if fit_log:
            y1 = hist
            y2 = exp(hist)
        else:
            y1 = log(hist)
            y2 = hist
        ax1 = fig.add_subplot(211)
        ax1.plot(xb, y1, 'o')
        ax2 = fig.add_subplot(212)
        ax2.plot(xb, y2, 'o')

    if func == 'gp' or func == 'poly':
        if func == 'gp':
            if not gp:
                raise RuntimeError, "To use GP interpolation, you need to install pymc"
            scale = xb.max() - xb.min()
            M = gp.Mean(
                lambda x: zeros(x.shape[0], dtype=float32) + median(hist))
            C = gp.Covariance(gp.matern.euclidean,
                              diff_degree=3,
                              scale=scale * 0.5,
                              amp=std(hist))

            # Pad with zeros
            if zero_pad and not fit_log:
                obs_mesh = concatenate([
                    xb.min() + (xb - xb.max())[:-1], xb,
                    xb.max() + (xb - xb.min())[1:]
                ])
                obs = concatenate([hist[1:] * 0, hist, hist[1:] * 0])
                var = concatenate([hist[1:] * 0, var, hist[1:] * 0])
            else:
                obs_mesh = xb
                obs = hist
            gp.observe(M, C, obs_mesh=obs_mesh, obs_vals=obs, obs_V=var)

            func = lambda x: wrap_M(x, M, xb[0], xb[-1], log=fit_log)

        else:
            x0 = xb[argmax(hist)]
            pars, epars = fit_poly.fitpoly(xb,
                                           hist,
                                           w=1. / var,
                                           x0=x0,
                                           k=order)
            func = lambda x: wrap_poly(x, x0, pars, xb[0], xb[-1], log=fit_log)

        if debug:
            ax1.plot(xplot, log(func(xplot)), '-')
            ax2.plot(xplot, func(xplot), '-')
        oneside = False
        if argmax(hist) == 0:
            mod = xb[0]
            oneside = True
        elif argmax(hist) == len(xb) - 1:
            mod = xb[-1]
            oneside = True
        else:
            mod0 = xb[argmax(hist)]
            try:
                mod = brent(lambda x: -func(x),
                            brack=(xb.min(), mod0, xb.max()))
            except:
                # monotonic.  Take extremum
                oneside = True
                if func(xb[0]) > func(xb[-1]):
                    mod = xb[0]
                else:
                    mod = xb[-1]

        fac = integrate.quad(func, xb[0], xb[-1])[0]
        prob = lambda x: func(x) / fac

        #end tolerance  if requested
        lower_limit = False
        upper_limit = False
        if end_tol[0] is not None and float(
                hist0[0]) / hist0.max() > end_tol[0]:
            lower_limit = True
        if end_tol[1] is not None and float(
                hist0[-1]) / hist0.max() > end_tol[1]:
            upper_limit = True
        if lower_limit and upper_limit:
            #  too flat, return mode, but no limits
            return mod, nan, nan
        elif lower_limit and not upper_limit:
            # one-sided
            tail = (1 - conf)
            upper = brentq(\
                  lambda x: integrate.quad(prob, x, xplot[-1])[0]-tail,
                     mod, xplot[-1])
            return mod, nan, upper
        elif upper_limit and not lower_limit:
            tail = (1 - conf)
            lower = brentq(\
                  lambda x: integrate.quad(prob, xplot[0], x)[0]-tail,
                  xplot[0], xplot[-1])
            return mod, lower, nan

        if debug:
            ax1.axvline(mod, color='red')
            ax2.axvline(mod, color='red')

        if oneside:
            tail = (1 - conf)
        else:
            tail = (1 - conf) / 2
        if integrate.quad(prob, xplot[0], mod)[0] < tail:
            # No lower bound
            minus = nan
        else:
            lower = brentq(\
                  lambda x: integrate.quad(prob, xplot[0], x)[0]-tail,
                  xplot[0], mod)
            minus = mod - lower
            if debug:
                ax1.axvline(lower, color='orange')
                ax2.axvline(lower, color='orange')
        #test for upper bound
        if integrate.quad(prob, mod, xplot[-1])[0] < tail:
            # No upper bound
            plus = nan
        else:
            upper = brentq(\
                  lambda x: integrate.quad(prob, x, xplot[-1])[0]-tail,
                     mod, xplot[-1])
            plus = upper - mod
            if debug:
                ax1.axvline(upper, color='orange')
                ax2.axvline(upper, color='orange')

    else:
        hist = hist * 1.0 / sum(hist)
        mid = argmax(hist)
        mod = xb[mid]
        if debug:
            ax1.axvline(mod, color='red')
            ax2.axvline(mod, color='red')
        i0 = 0
        i1 = len(hist) - 1
        prob = 0
        while (prob < (1 - conf) / 2):
            if i0 < mid:
                i0 += 1
            else:
                break
            prob = sum(hist[0:i0])
        if i0 == 0:
            lower = None
        else:
            lower = xb[i0]
            if debug:
                ax1.axvline(lower, color='orange')
                ax2.axvline(lower, color='orange')
        while (prob < 1 - conf):
            if i1 > mid:
                i1 -= 1
            else:
                break
            prob = sum(hist[0:i0]) + sum(hist[i1:])
        if i1 == len(xb) - 1:
            upper = None
        else:
            upper = xb[i1]
            if debug:
                ax1.axvline(upper, color='orange')
                ax2.axvline(upper, color='orange')
        if upper is not None:
            plus = upper - mod
        else:
            plus = nan
        if lower is not None:
            minus = mod - lower
        else:
            minus = nan
    return mod, minus, plus
コード例 #6
0
    elif options.tally.lower() == 'true':
        results = tally_results()
        print 'median over all replicates of median absolute relative error'
        print results.unstack()['mare', '50%'].unstack()

    else:
        N = int(options.numberofrows)
        delta_true = float(options.delta)
        replicate = int(options.replicate)

        print 'Running random effects validation for:'
        print 'N', N
        print 'delta_true', delta_true
        print 'replicate', replicate

        M = gp.Mean(validate_consistent_model.constant)
        C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50)
        gp.observe(M, C, [0, 100], [-5, -5])

        true = {}
        li = gp.Realization(M, C)
        true['i'] = lambda x: pl.exp(li(x))
        lr = gp.Realization(M, C)
        true['r'] = lambda x: pl.exp(lr(x))
        lf = gp.Realization(M, C)
        true['f'] = lambda x: pl.exp(lf(x))

        model = validate_consistent_model.validate_consistent_model_sim(
            N, delta_true, true)
        model.results.to_csv(
            '%s/%s/%s-%s-%s-%s.csv' %
コード例 #7
0
ファイル: MCMCstats.py プロジェクト: obscode/CSPMCMC
    def plot2dsurf(self,
                   param1,
                   param2,
                   ax=None,
                   xrange=None,
                   yrange=None,
                   bins=30,
                   smooth=False,
                   bfac=2,
                   sfac=1.,
                   dd=3,
                   cmap=cm.gray_r,
                   levels=[],
                   ccolor='red',
                   fill=False,
                   ccmap=None,
                   falpha=1.0,
                   outfile=None,
                   zorder=None):
        '''Plot up a 2D binned paramter plot for [param1] and [param2].
      if [ax] is supplied, use it to plot, otherwise, open up a new figure
      and axes.  You can specify [xrange] and [yrange].  [bins] will be
      passed to histogram2d.  If [smooth], the binned surface is smoothed
      using either a bivariate spline or a Gaussian Process (if pymc.gp is
      available).  If [cmap] is None, no image is drawn.  If [levels] is
      specified as fractions (0.68, 0.95, etc), draw the contours that
      enclose this fraction of the data.'''
        if ax is None:
            fig = plt.figure()
            ax = fig.add_subplot(111)
            own_ax = True
        else:
            own_ax = False

        #if ccmap is not None and ccolor is not None:
        #   # Cmap takes precedence
        #   ccolor = None

        tr1 = self.get_trace0(param1)
        tr2 = self.get_trace0(param2)
        if len(tr1.shape) != 1 or len(tr2.shape) != 1:
            raise RuntimeError, "Error, variables must be scalars, try using ':' notation"
        #tr1 = tr1[:,0]
        #tr2 = tr2[:,0]
        range = [[tr1.min(), tr1.max()], [tr2.min(), tr2.max()]]
        if xrange is not None:
            range[0] = list(xrange)
        if yrange is not None:
            range[1] = list(yrange)

        # first, bin up the data (all of it)
        grid, xs, ys = histogram2d(tr1, tr1, bins=bins, range=range)
        grid = grid.T * 1.0
        xplot = linspace(xs[0], xs[-1], 101)
        yplot = linspace(ys[0], ys[-1], 101)
        extent = [xs[0], xs[-1], ys[0], ys[-1]]

        xs = (xs[1:] + xs[:-1]) / 2
        ys = (ys[1:] + ys[:-1]) / 2

        x, y = meshgrid(xs, ys)
        tx = xs[::bfac]
        ty = ys[::bfac]
        if smooth and not gp:
            tck = bisplrep(ravel(x),
                           ravel(y),
                           ravel(grid),
                           task=-1,
                           tx=tx,
                           ty=ty)
            x = linspace(xs[0], xs[-1], 501)
            y = linspace(ys[0], ys[-1], 501)
            grid = bisplev(x, y, tck).T
        elif smooth and gp:
            M = gp.Mean(
                lambda x: zeros(x.shape[:-1], dtype=float) + median(grid))
            scalerat = (tr2.max() - tr2.min()) / (tr1.max() - tr1.min())
            C = gp.Covariance(gp.matern.aniso_euclidean,
                              diff_degree=dd,
                              scale=(tr1.max() - tr1.min()) * sfac,
                              amp=std(grid),
                              scalerat=scalerat)
            x, y = meshgrid(xs, ys)
            mesh = vstack((ravel(x), ravel(y))).T
            gp.observe(M,
                       C,
                       obs_mesh=mesh,
                       obs_vals=ravel(grid),
                       obs_V=ravel(grid))
            dplot = dstack(meshgrid(xplot, yplot))
            grid, Vsurf = gp.point_eval(M, C, dplot)

        grid = where(grid < 0, 0, grid)

        if cmap:
            ax.imshow(grid,
                      extent=extent,
                      origin='lower',
                      aspect='auto',
                      interpolation='nearest',
                      cmap=cmap)
        if levels:
            prob = ravel(grid) / sum(grid)
            sprob = sort(prob)
            cprob = 1.0 - cumsum(sprob)
            clevels = []
            for l in levels:
                id = nonzero(greater(cprob - l, 0))[0][-1]
                clevels.append(sprob[id])
            prob.shape = grid.shape

            clevels.sort()
            norm = Normalize(clevels[0] * 0.5, clevels[-1] * 1.3)
            if fill:
                ax.contourf(prob,
                            levels=clevels + [1],
                            extent=extent,
                            origin='lower',
                            alpha=falpha,
                            cmap=ccmap,
                            norm=norm,
                            zorder=zorder)
            ax.contour(prob,
                       levels=clevels,
                       colors=ccolor,
                       extent=extent,
                       origin='lower',
                       linewidths=2,
                       zorder=zorder)

        if own_ax:
            ax.set_xlabel("$%s$" % param1)
            ax.set_ylabel("$%s$" % param2)
            if xrange is not None:
                ax.set_xlim(xrange[0], xrange[1])
            if yrange is not None:
                ax.set_ylim(yrange[0], yrange[1])
            plt.draw()
            if outfile is not None:
                fig.savefig(outfile)
            return fig
コード例 #8
0
ファイル: fyba.py プロジェクト: afcarl/fyba
 def M_g(eval_fun=linfun, c=self.goal_rate[t.team_id]):
     return gp.Mean(eval_fun, c=c)
コード例 #9
0
ファイル: fyba.py プロジェクト: afcarl/fyba
 def M_d(eval_fun=linfun, c=self.def_rate[t.team_id]):
     return gp.Mean(eval_fun, c=c)
コード例 #10
0
def smooth(x):
    from pymc import gp
    M = gp.Mean(lambda x: zeros(len(x)))
    C = gp.Covariance(gp.matern.euclidean, amp=1, scale=15, diff_degree=2)
    gp.observe(M, C, range(len(x)), x, .5)
    return M(range(len(x)))
コード例 #11
0
    else:
        N = int(options.numberofrows)
        delta_true = float(options.delta)
        replicate = int(options.replicate)
        bias = float(options.bias)
        sigma_prior = float(options.sigma)

        print 'Running random effects validation for:'
        print 'N', N
        print 'delta_true', delta_true
        print 'bias', bias
        print 'sigma_prior', sigma_prior
        print 'replicate', replicate

        M = gp.Mean(validate_similarity.quadratic)
        C = gp.Covariance(gp.matern.euclidean, amp=1., diff_degree=2, scale=50)
        gp.observe(M, C, [0, 30, 100], [-5, -3, -5])

        true = {}
        lp = gp.Realization(M, C)
        true_p = lambda x: pl.exp(lp(x))

        model = validate_similarity.generate_data(N, delta_true, true_p,
                                                  'Unusable', bias,
                                                  sigma_prior)
        for het in 'Very Moderately Slightly'.split():
            model.parameters['p']['heterogeneity'] = het
            validate_similarity.fit(model)
            model.results.to_csv(
                '%s/%s/%s-%s-%s-%s-%s-%s.csv' %