Esempio n. 1
0
    def _make_empbayes_fit(self, empbayes_grouping='order'):
        if (self._empbayes_fit is None) or (empbayes_grouping !=
                                            self.empbayes_grouping):
            self.empbayes_grouping = empbayes_grouping
            self._counter = {'iters': 0, 'evals': 0}

            z0 = gv.BufferDict()
            for group in self._empbayes_groupings():
                z0[group] = 1.0

            # Might need to change minargs default values for empbayes_fit to converge:
            # tol=1e-8, svdcut=1e-12, debug=False, maxit=1000, add_svdnoise=False, add_priornoise=False
            # Note: maxit != maxfev. See https://github.com/scipy/scipy/issues/3334
            # For Nelder-Mead algorithm, maxfev < maxit < 3 maxfev?

            # For debugging. Same as 'callback':
            # https://github.com/scipy/scipy/blob/c0dc7fccc53d8a8569cde5d55673fca284bca191/scipy/optimize/optimize.py#L651
            def analyzer(arg):
                self._counter['evals'] += 1
                print("\nEvals: ", self._counter['evals'], arg, "\n")
                print(type(arg[0]))
                return None

            fit, z = lsqfit.empbayes_fit(z0,
                                         fitargs=self._make_fitargs,
                                         maxit=200,
                                         analyzer=None)
            print(z)
            self._empbayes_fit = fit

        return self._empbayes_fit
Esempio n. 2
0
def main():
    sys_stdout = sys.stdout

    # version 1 - relative errors
    sys.stdout = tee.tee(sys_stdout, open("eg7a.out", "w"))

    # fit data and prior
    x = np.array([1., 2., 3., 4.])
    y = np.array([3.4422, 1.2929, 0.4798, 0.1725])
    prior = gv.gvar(['10(1)', '1.0(1)'])

    # fit function
    def fcn(x, p):
        return p[0] * gv.exp(-p[1] * x)

    # find optimal dy
    def fitargs(z):
        dy = y * z
        newy = gv.gvar(y, dy)
        return dict(data=(x, newy), fcn=fcn, prior=prior)

    fit, z = lsqfit.empbayes_fit(0.001, fitargs)
    print fit.format(True)
    if MAKE_PLOT:
        ratio = fit.y / fcn(x, fit.pmean)
        plt.errorbar(x=fit.x, y=gv.mean(ratio), yerr=gv.sdev(ratio), c='b')
        plt.plot([0.5, 4.5], [1.0, 1.0], c='r')

    # version 2 - additive errors
    sys.stdout = tee.tee(sys_stdout, open("eg7b.out", "w"))

    def fitargs(z):
        dy = np.ones_like(y) * z
        newy = gv.gvar(y, dy)
        return dict(data=(x, newy), fcn=fcn, prior=prior)

    fit, z = lsqfit.empbayes_fit(0.001, fitargs)
    print fit.format(True)

    if MAKE_PLOT:
        ratio = fit.y / fcn(x, fit.pmean)
        plt.errorbar(x=fit.x + 0.1,
                     y=gv.mean(ratio),
                     yerr=gv.sdev(ratio),
                     c='g')
        plt.show()
Esempio n. 3
0
def main():
    gv.ranseed([2009,2010,2011,2012]) # initialize random numbers (opt.)
    x,y = make_data()               # make fit data
    p0 = None                       # make larger fits go faster (opt.)
    for nexp in range(3,8):
        print('************************************* nexp =',nexp)
        prior = make_prior(nexp)
        fit = lsqfit.nonlinear_fit(data=(x,y),fcn=f,prior=prior,p0=p0,svdcut=SVDCUT)
        print(fit)                  # print the fit results
        E = fit.p['E']              # best-fit parameters
        a = fit.p['a']
        print('E1/E0 =',(E[1]/E[0]).fmt(),'  E2/E0 =',(E[2]/E[0]).fmt())
        print('a1/a0 =',(a[1]/a[0]).fmt(),'  a2/a0 =',(a[2]/a[0]).fmt())
        print()
        if fit.chi2/fit.dof<1.:
            p0 = fit.pmean          # starting point for next fit (opt.)
    
    if DO_ERRORBUDGET:
        outputs = OrderedDict([
            ('E1/E0', E[1]/E[0]), ('E2/E0', E[2]/E[0]),         
            ('a1/a0', a[1]/a[0]), ('a2/a0', a[2]/a[0])
            ])
        inputs = OrderedDict([
            ('E', fit.prior['E']), ('a', fit.prior['a']),
            ('y', y), ('svd', fit.svdcorrection)
            ])
        print(fit.fmt_values(outputs))
        print(fit.fmt_errorbudget(outputs,inputs))
        
    if DO_EMPBAYES:
        def fitargs(z,nexp=nexp,prior=prior,f=f,data=(x,y),p0=p0):
            z = gv.exp(z)
            prior['a'] = [gv.gvar(0.5,0.5*z[0]) for i in range(nexp)]
            return dict(prior=prior,data=data,fcn=f,p0=p0)
        ##
        z0 = [0.0]
        fit,z = lsqfit.empbayes_fit(z0,fitargs,tol=1e-3)
        print(fit)                  # print the optimized fit results
        E = fit.p['E']              # best-fit parameters
        a = fit.p['a']
        print('E1/E0 =',(E[1]/E[0]).fmt(),'  E2/E0 =',(E[2]/E[0]).fmt())
        print('a1/a0 =',(a[1]/a[0]).fmt(),'  a2/a0 =',(a[2]/a[0]).fmt())
        print("prior['a'] =",fit.prior['a'][0].fmt())
        print()
    
    if DO_PLOT:
        import pylab as pp   
        from gvar import mean,sdev     
        fity = f(x,fit.pmean)
        ratio = y/fity
        pp.xlim(0,21)
        pp.xlabel('x')
        pp.ylabel('y/f(x,p)')
        pp.errorbar(x=x,y=mean(ratio),yerr=sdev(ratio),fmt='ob')
        pp.plot([0.0,21.0],[1.0,1.0])
        pp.show()
Esempio n. 4
0
def main():
    gv.ranseed([2009,2010,2011,2012]) # initialize random numbers (opt.)
    x,y = make_data()               # make fit data
    p0 = None                       # make larger fits go faster (opt.)
    for nexp in range(3,5):
        print '************************************* nexp =',nexp
        prior = make_prior(nexp)
        fit = lsqfit.nonlinear_fit(data=(x,y),fcn=f,prior=prior,p0=p0)
        print fit                   # print the fit results
        E = fit.p['E']              # best-fit parameters
        a = fit.p['a']
        print 'E1/E0 =',E[1]/E[0],'  E2/E0 =',E[2]/E[0]
        print 'a1/a0 =',a[1]/a[0],'  a2/a0 =',a[2]/a[0]
        print
        if fit.chi2/fit.dof<1.:
            p0 = fit.pmean          # starting point for next fit (opt.)
    sys_stdout = sys.stdout
    if DO_ERRORBUDGET:

        lines = [
            "E = fit.p['E']",
            "a = fit.p['a']",
            "print(E[1] / E[0])",
            "print((E[1] / E[0]).partialsdev(fit.prior['E']))",
            "print((E[1] / E[0]).partialsdev(fit.prior['a']))",
            "print((E[1] / E[0]).partialsdev(y))"
            ]
        sys.stdout = tee.tee(sys_stdout, open("eg4c.out","w"))
        for line in lines:
            print ">>>", line
            if line[:5] == "print":
                print(eval(line[5:]))
        # print E[1]/E[0]
        # print (E[1]/E[0]).partialsdev(fit.prior['E'])
        # print (E[1]/E[0]).partialsdev(fit.prior['a'])
        # print (E[1]/E[0]).partialsdev(y)
        outputs = {'E1/E0':E[1]/E[0], 'E2/E0':E[2]/E[0],
                 'a1/a0':a[1]/a[0], 'a2/a0':a[2]/a[0]}
        inputs = {'E':fit.prior['E'],'a':fit.prior['a'],'y':y}

        sys.stdout = tee.tee(sys_stdout, open("eg4b.out","w"))
        print fit.fmt_values(outputs)
        print fit.fmt_errorbudget(outputs,inputs)
        sys.stdout = sys_stdout

    if DO_SIMULATIONS:
        # fit simulations
        sys.stdout = tee.tee(sys_stdout, open("eg4d.out","w"))

        for sfit in fit.simulated_fit_iter(3):
            print '************************************* simulation'
            print(sfit)
            sE = sfit.p['E']             # best-fit parameters
            sa = sfit.p['a']
            E = sfit.pexact['E']
            a = sfit.pexact['a']
            print 'E1/E0 =', sE[1] / sE[0], '  E2/E0 =', sE[2] / sE[0]
            print 'a1/a0 =', sa[1] / sa[0], '  a2/a0 =', sa[2] / sa[0]
            print '\nSimulated Fit Values - Exact Values:'
            print 'E1/E0:', (sE[1] / sE[0]) - (E[1] / E[0]),\
               '  E2/E0:', (sE[2] / sE[0]) - (E[2] / E[0])
            print 'a1/a0:', (sa[1] / sa[0]) - (a[1] / a[0]),\
               '  a2/a0:', (sa[2] / sa[0]) - (a[2] / a[0])

            # compute chi**2 comparing fit results to exact results
            sim_results = [sE[0], sE[1], sa[0], sa[1]]
            exact_results = [E[0], E[1], a[0], a[1]]
            chi2 = gv.chi2(sim_results, exact_results, svdcut=1e-8)
            print '\nParameter chi2/dof [dof] = %.2f' % (chi2/chi2.dof), '[%d]' % chi2.dof, '  Q = %.1f' % chi2.Q
            print
        sys.stdout = sys_stdout

    if DO_EMPBAYES:
        def fitargs(z,nexp=nexp,prior=prior,f=f,data=(x,y),p0=p0):
            z = gv.exp(z)
            prior['a'] = [gv.gvar(0.5,0.5*z[0]) for i in range(nexp)]
            return dict(prior=prior,data=data,fcn=f,p0=p0)
        ##
        z0 = [0.0]
        fit,z = lsqfit.empbayes_fit(z0,fitargs,tol=1e-3)
        sys.stdout = tee.tee(sys_stdout, open("eg4a.out","w"))
        print fit                   # print the optimized fit results
        E = fit.p['E']              # best-fit parameters
        a = fit.p['a']
        print 'E1/E0 =',E[1]/E[0],'  E2/E0 =',E[2]/E[0]
        print 'a1/a0 =',a[1]/a[0],'  a2/a0 =',a[2]/a[0]
        # print "prior['a'] =",fit.prior['a'][0]
        sys.stdout = sys_stdout
        print

    if DO_PLOT:
        import pylab as pp
        from gvar import mean,sdev
        fity = f(x,fit.pmean)
        ratio = y/fity
        pp.xlim(0,21)
        pp.xlabel('x')
        pp.ylabel('y/f(x,p)')
        pp.errorbar(x=x,y=mean(ratio),yerr=sdev(ratio),fmt='ob')
        pp.plot([0.0,21.0],[1.0,1.0])
        pp.show()
Esempio n. 5
0
        data=data,
        fcn=fcn,
        prior=prior,
        p0=p0,
    )
    residuals = linalg.solve_triangular(hyperchol, hp.buf - hypermean)
    plausibility = -1 / 2 * (residuals @ residuals)
    return args, plausibility


print('\ntrue hyperparameters:')
print(truehp)

print('\nfit:')
z0 = gvar.sample(hyperprior)
fit, fithp = lsqfit.empbayes_fit(z0, fitargs)
print(fit.format(maxline=True, pstyle='v'))
print(fit.format(maxline=-1))

gp = makegp(fithp)
prior = makeprior(gp)
pred = fcn(fit.p)
fitgrid = gp.predfromfit(dict(datagrid=fit.p['datagrid'], **constraints),
                         'plotgrid')

print('\ncheck constraints in fit:')
check_constraints(fitgrid)


def allkeys(d):
    for k in d:
Esempio n. 6
0
def main():
    gd.ranseed([2009,2010,2011,2012]) # initialize random numbers (opt.)
    x,y = make_data()               # make fit data
    p0 = None                       # make larger fits go faster (opt.)
    for nexp in range(2,8):
        if nexp == 2:
            sys_stdout = sys.stdout
            sys.stdout = tee.tee(sys_stdout, open("eg4GBF.out","w"))
        print '************************************* nexp =',nexp
        prior = make_prior(nexp)
        fit = lsqfit.nonlinear_fit(data=(x,y),fcn=f,prior=prior,p0=p0)
        print fit                   # print the fit results
        # E = fit.p['E']              # best-fit parameters
        # a = fit.p['a']
        # print 'E1/E0 =',E[1]/E[0],'  E2/E0 =',E[2]/E[0]
        # print 'a1/a0 =',a[1]/a[0],'  a2/a0 =',a[2]/a[0]
        print
        if nexp == 3:
            sys.stdout = sys_stdout
        if fit.chi2/fit.dof<1.:
            p0 = fit.pmean          # starting point for next fit (opt.)
    if DO_ERRORBUDGET:
        print E[1]/E[0]
        print (E[1]/E[0]).partialsdev(fit.prior['E'])
        print (E[1]/E[0]).partialsdev(fit.prior['a'])
        print (E[1]/E[0]).partialsdev(y)
        outputs = {'E1/E0':E[1]/E[0], 'E2/E0':E[2]/E[0],         
                 'a1/a0':a[1]/a[0], 'a2/a0':a[2]/a[0]}
        inputs = {'E':fit.prior['E'],'a':fit.prior['a'],'y':y}
        
        sys.stdout = tee.tee(sys_stdout, open("eg4GBFb.out","w"))
        print fit.fmt_values(outputs)
        print fit.fmt_errorbudget(outputs,inputs)
        sys.stdout = sys_stdout
        
    if DO_EMPBAYES:
        def fitargs(z,nexp=nexp,prior=prior,f=f,data=(x,y),p0=p0):
            z = gd.exp(z)
            prior['a'] = [gd.gvar(0.5,0.5*z[0]) for i in range(nexp)]
            return dict(prior=prior,data=data,fcn=f,p0=p0)
        ##
        z0 = [0.0]
        fit,z = lsqfit.empbayes_fit(z0,fitargs,tol=1e-3)
        sys.stdout = tee.tee(sys_stdout, open("eg4GBFa.out","w"))
        print fit                   # print the optimized fit results
        E = fit.p['E']              # best-fit parameters
        a = fit.p['a']
        print 'E1/E0 =',E[1]/E[0],'  E2/E0 =',E[2]/E[0]
        print 'a1/a0 =',a[1]/a[0],'  a2/a0 =',a[2]/a[0]
        print "prior['a'] =",fit.prior['a'][0]
        sys.stdout = sys_stdout
        print
    
    if DO_PLOT:
        import pylab as pp   
        from gvar import mean,sdev     
        fity = f(x,fit.pmean)
        ratio = y/fity
        pp.xlim(0,21)
        pp.xlabel('x')
        pp.ylabel('y/f(x,p)')
        pp.errorbar(x=x,y=mean(ratio),yerr=sdev(ratio),fmt='ob')
        pp.plot([0.0,21.0],[1.0,1.0])
        pp.show()
Esempio n. 7
0
def main():
    """Executes the command line script
    """

    if __name__ == "__main__":
        parameter_file_path = 'data/foo.csv'
        parameters = read_parameters('data/foo.csv')
        data_file_path = 'data/HUP_ts.csv'
        data = read_data(data_file_path)
        error_file_path = 'data/data_errors.csv'
        model = SEIRModel(fit_columns=["hospital_census", "vent_census"],
                          update_parameters=flexible_beta)
        xval = True
        k = 10
        spline_power = 2
        splines = np.linspace(0, data.shape[0] - 5, k).astype(int)
        win = 40
        pen = .002
        beta_fun = 'flexible_beta'
        pd.options.display.max_rows = 4000
        pd.options.display.max_columns = 4000
    else:
        args = parse_args()
        #
        data_file_path = args.data_file
        parameter_file_path = args.parameter_file
        beta_fun = args.beta
        spline_power = args.spline_power
        xval = args.cross_validate if args.beta == "flexible_beta" else False
        error_file_path = args.data_error_file
        k = args.spline_dimension

        if args.verbose:
            for handler in LOGGER.handlers:
                handler.setLevel(DEBUG)

        LOGGER.debug("Received arguments:\n%s", args)

        parameters = read_parameters(parameter_file_path)
        LOGGER.debug("Read parameters:\n%s", parameters)

        data = read_data(data_file_path)
        LOGGER.debug("Read data:\n%s", data)

        model = SEIRModel(
            fit_columns=["hospital_census", "vent_census"],
            update_parameters=flexible_beta if beta_fun == "flexible_beta" \
                                            else logistic_social_policy,
        )

        # parse the splines
        # TODO:  note this will need to be generalized once we've got more features time-varying
        if k > 0:
            splines = np.arange(0, data.shape[0], int(data.shape[0] / k))
        else:
            splines = -99
            assert args.beta != "flexible_beta", "You need to specify some splines with '-k <spline dimension> if you're using flexible beta"

    ## CROSS VALIDATION
    if xval is True:
        print("Doing rolling-window cross-validation")
        assert error_file_path is not None, "Haven't yet implemented cross-validation for empirical bayes.  Please supply a data error file (i.e.: `-y data/data_errors.csv`)"
        # loop through windows, and in each one, forecast one week out.
        penvec = 10**np.linspace(-10, 5, 16)

        winstart = list(range(data.shape[0] - 14, (data.shape[0] - 7)))
        tuples_for_starmap = [(p, w, parameter_file_path, splines, k,
                               data_file_path, error_file_path, k)
                              for p in penvec for w in winstart]

        pool = mp.Pool(mp.cpu_count())
        xval_results = pool.starmap(xval_wrapper, tuples_for_starmap)
        pool.close()
        xval_df = pd.DataFrame(xval_results)
        # remove errors
        errors = (xval_df.mse == -9999).sum()
        # assert errors < xval_df.shape[0]*.2, "Lot's of errors when doing cross-validation.  Breaking here rather than returning unreliable results."
        xval_df = xval_df.loc[xval_df.mse > 0]
        xval_df['rmse'] = xval_df.mse**.5
        penframe = xval_df.groupby(['pen']).agg({
            'rmse': ['mean', 'std']
        },
                                                as_index=False).reset_index()
        penframe.columns = ['pen', 'mu', 'sig']

        best_penalty = penframe.pen.loc[penframe.mu == min(
            penframe.mu)].iloc[0]
        print(
            f"The best prior sd on the splines is {best_penalty}.  Don't forget to look at the plot of cross-validation statistics (in the output directory) to make sure that there's nothing wacky going on."
        )
        parameters['pen_beta'] = gvar(0, best_penalty)

    degen_flag = True
    while degen_flag:
        xx, pp = prepare_model_parameters(parameters=parameters,
                                          data=data,
                                          beta_fun=beta_fun,
                                          splines=splines,
                                          spline_power=spline_power)
        LOGGER.debug("Parsed model meta pars:\n%s", xx)
        LOGGER.debug("Parsed model priors:\n%s", pp)
        model.fit_start_date = xx["day0"]

        # If empirical bayes is selected to fit the data, this also returns the fit object
        LOGGER.debug("Starting fit")
        if args.data_error_file:
            xx["error_infos"] = (read_csv(error_file_path).set_index("param")
                                 ["value"].to_dict())

            LOGGER.debug("Using y_errs from file:\n%s", xx["error_infos"])
            fit = nonlinear_fit(
                data=(xx, get_yy(data, **xx["error_infos"])),
                prior=pp,
                fcn=model.fit_fcn,
                # debug=args.verbose,
            )
        else:
            LOGGER.debug("Employing empirical Bayes to infer y-errors")
            # This fit varies the size of the y-errors of hosp_min and vent_min
            # to optimize the description of the data (logGBF)
            fit_kwargs = lambda error_infos: dict(
                data=(xx, get_yy(data, hosp_rel=0, vent_rel=0, **error_infos)),
                prior=pp,
                fcn=model.fit_fcn,
                debug=args.verbose,
            )
            fit, xx["error_infos"] = empbayes_fit(
                {
                    "hosp_min": 10,
                    "vent_min": 1
                }, fit_kwargs)
            LOGGER.debug("Empbayes y_errs are:\n%s", xx["error_infos"])
        # check for degeneracy
        splinecoefvec = np.array([
            fit.p['beta_splines'][i].mean
            for i in range(len(fit.p['beta_splines']))
        ])
        cv = np.std(splinecoefvec[1:]) / np.mean(splinecoefvec[1:])
        BI_update = (fit.p['beta_intercept'] -
                     parameters['beta_intercept']).mean
        coef_OM_range = np.ptp(np.log10(np.abs(splinecoefvec)))
        if (cv < .1) | (BI_update**2 < .1) | (coef_OM_range > 2):
            print(
                'the best prior sd on the splines led to a degenerate fit.  trimming it by one order of magnitude'
            )
            curr = np.log10(best_penalty)
            assert curr > -5, "degenerate solutions all the way down.  Something is broken."
            best_penalty = 10**(curr - 1)
            print(f"new best prior sd on the splines is {best_penalty}")
            parameters['pen_beta'] = gvar(0, best_penalty)
        else:
            degen_flag = False

    LOGGER.info("Fit result:\n%s", fit)

    dump_results(args.output_dir,
                 fit=fit,
                 model=model,
                 extend_days=args.extend_days)
    LOGGER.debug("Dumped results to:\n%s", args.output_dir)
Esempio n. 8
0
# the Free Software Foundation, either version 3 of the License, or
# any later version (see <http://www.gnu.org/licenses/>).
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

from __future__ import print_function   # makes this work for python2 and 3

import numpy as np
import gvar as gv
import lsqfit

x = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7])
y = np.array([
    '0.133426(95)', '0.20525(15)', '0.27491(20)', '0.32521(25)',
    '0.34223(28)', '0.32394(28)', '0.27857(27)'
    ])

def fcn(x, p):
    return gv.exp(-p[0] - p[1] * x - p[2] * x**2 - p[3] * x**3)

def fitargs(z):
    dp = z
    prior = gv.gvar([gv.gvar(0, dp) for i in range(4)])
    return dict(prior=prior, fcn=fcn, data=(x,y))

fit,z = lsqfit.empbayes_fit(1.0, fitargs)
print(fit.format(True))
Esempio n. 9
0
y = np.array([
    '0.133426(95)', '0.20525(15)', '0.27491(20)', '0.32521(25)', '0.34223(28)',
    '0.32394(28)', '0.27857(27)'
])


def fcn(x, p):
    return gv.exp(-p[0] - p[1] * x - p[2] * x**2 - p[3] * x**3)


def fitterargs(z):
    prior = [gv.gvar(0, z) for i in range(4)]
    return dict(prior=prior, fcn=fcn, data=(x, y))


fit, z = lsqfit.empbayes_fit(1., fitterargs)

sys.stdout = tee.tee(sys_stdout, open('eg4a.out', 'w'))
print fit.format(True)

sys.stdout = sys_stdout

prior = gv.gvar([
    '2.5904 +- 2.6e-16',
    '-6.53012 +- 6.5e-16',
    '7.83211 +- 7.8e-16',
    '-1.68813 +- 1.7e-16',
])
fit = lsqfit.nonlinear_fit(data=(x, y), prior=prior, fcn=fcn)
print fit
Esempio n. 10
0
def bayes_xval(days_withheld=7, which_hospital="HUP"):
    try:
        parameters = read_parameters(
            f"{datadir}{which_hospital}_parameters.csv")
        data = read_data(f"{datadir}{which_hospital}_ts.csv")[:-days_withheld]
        test_set = pd.read_csv(
            f"{datadir}{which_hospital}_ts.csv")[-days_withheld:]
        test_set.date = test_set.date.astype("datetime64[ns]")
        model = SEIRModel(
            fit_columns=["hospital_census", "vent_census"],
            update_parameters=logisitic_social_policy,
        )

        xx, pp = prepare_model_parameters(parameters, data)
        model.fit_start_date = xx["day0"]

        fit_kwargs = lambda error_infos: dict(
            data=(xx, get_yy(data, hosp_rel=0, vent_rel=0, **error_infos)),
            prior=pp,
            fcn=model.fit_fcn,
            debug=True,
        )
        fit, xx["error_infos"] = empbayes_fit({
            "hosp_min": 10,
            "vent_min": 1
        }, fit_kwargs)
        # extend by 60 days
        xx["dates"] = xx["dates"].union(
            date_range(xx["dates"].max(), freq="D", periods=60))
        prediction_df = model.propagate_uncertainties(xx, fit.p)
        prediction_df.index = prediction_df.index.round("H")

        # drop the index
        prediction_df = prediction_df.reset_index()
        prediction_df['hmu'] = prediction_df.hospital_census.apply(
            lambda x: float(str(x).split("(")[0]))
        prediction_df['hsig'] = prediction_df.hospital_census.apply(
            lambda x: float(str(x).split("(")[1][:-1])
            if "(" in str(x) else float(x))
        prediction_df['vmu'] = prediction_df.vent_census.apply(
            lambda x: float(str(x).split("(")[0]))
        prediction_df['vsig'] = prediction_df.vent_census.apply(
            lambda x: float(str(x).split("(")[1][:-1])
            if "(" in str(x) else float(x))

        # merge
        mm = prediction_df.merge(test_set, how='left')
        tomerge = copy.deepcopy(data)
        tomerge.columns = ["obs_" + i for i in tomerge.columns]
        tomerge.reset_index(inplace=True)
        mm = mm.merge(tomerge, how='outer')

        # compute simple msfe
        hMSFE = np.mean((mm.hmu - mm.hosp)**2)
        vMSFE = np.mean((mm.vmu - mm.vent)**2)
        loss_approx = (hMSFE + vMSFE) / 2

        # now run MCMC
        params_raw = pd.read_csv(f"{datadir}{which_hospital}_parameters.csv")
        df = do_chains(n_iters=9000,
                       params=params_raw,
                       obs=read_data(f"{datadir}{which_hospital}_ts.csv"),
                       best_penalty=None,
                       sample_obs=False,
                       holdout=days_withheld,
                       n_chains=1,
                       parallel=False)
        df = df.loc[df.iter > 1000]
        arrs = np.stack([df.arr.iloc[i] for i in range(df.shape[0])])
        arrs_test = arrs[:, data.shape[0]:(data.shape[0] + days_withheld), :]
        median_pred = np.median(arrs_test, axis=0)
        loss_mcmc = (np.mean((median_pred[:,3] - test_set.hosp)**2) \
                     + np.mean((median_pred[:,5] - test_set.vent)**2))/2        # output

        # prediction quantiles:  the proportion of times the prediction is greater than the mean

        hq_m = [(arrs_test[:, day, 3] > test_set.hosp.iloc[day]).mean()
                for day in range(days_withheld)]
        vq_m = [(arrs_test[:, day, 5] > test_set.vent.iloc[day]).mean()
                for day in range(days_withheld)]

        mmtail = mm.tail(days_withheld)
        hq_n = [(np.random.normal(mmtail.hmu.iloc[day], mmtail.hsig.iloc[day],
                                  10000) > test_set.hosp.iloc[day]).mean()
                for day in range(days_withheld)]
        vq_n = [(np.random.normal(mmtail.vmu.iloc[day], mmtail.vsig.iloc[day],
                                  10000) > test_set.vent.iloc[day]).mean()
                for day in range(days_withheld)]

        plotr = dict(hq_m=hq_m, vq_m=vq_m, hq_n=hq_n, vq_n=vq_n)

        resh_n = mm.hmu[:len(data) +
                        len(test_set)] - np.array(data.hosp.tolist() +
                                                  test_set.hosp.tolist())
        resh_m = np.median(
            arrs[:, :len(data) + len(test_set), 3],
            axis=0) - np.array(data.hosp.tolist() + test_set.hosp.tolist())
        resv_n = mm.vmu[:len(data) +
                        len(test_set)] - np.array(data.vent.tolist() +
                                                  test_set.vent.tolist())
        resv_m = np.median(
            arrs[:, :len(data) + len(test_set), 5],
            axis=0) - np.array(data.vent.tolist() + test_set.vent.tolist())

        plotq = dict(
            resh_n=mm.hmu[:len(data) + len(test_set)] -
            np.array(data.hosp.tolist() + test_set.hosp.tolist()),
            resh_m=np.median(arrs[:, :len(data) + len(test_set), 3], axis=0) -
            np.array(data.hosp.tolist() + test_set.hosp.tolist()),
            resv_n=mm.vmu[:len(data) + len(test_set)] -
            np.array(data.vent.tolist() + test_set.vent.tolist()),
            resv_m=np.median(arrs[:, :len(data) + len(test_set), 5], axis=0) -
            np.array(data.vent.tolist() + test_set.vent.tolist()))

        out = dict(which_hospital=which_hospital,
                   days_out=days_withheld,
                   loss_mcmc=loss_mcmc,
                   loss_approx=loss_approx,
                   plotq=plotq,
                   plotr=plotr)
        return out
    except Exception as e:
        print(e)