Ejemplos de quantile en Python, ejemplos de pylab.quantile en Python

Ejemplo n.º 1

0

Mostrar archivo

def plotter(key, sims, ax, label='', ylabel='', low_q=0.05, high_q=0.95, startday=None):

    color = cv.get_colors()[key.split('_')[1]]

    ys = []
    for s in sims:
        ys.append(s.results[key].values)
    yarr = np.array(ys)

    best = pl.median(yarr, axis=0)
    low  = pl.quantile(yarr, q=low_q, axis=0)
    high = pl.quantile(yarr, q=high_q, axis=0)

    sim = sims[0]

    tvec = np.arange(len(best))

    fill_label = None
    pl.fill_between(tvec, low, high, facecolor=color, alpha=0.2, label=fill_label)
    pl.plot(tvec, best, c=color, label=label, lw=4, alpha=1.0)

    sc.setylim()

    datemarks = pl.array([sim.day('2020-03-01'),sim.day('2020-05-01'),sim.day('2020-07-01'),
                          sim.day('2020-09-01')])
    ax.set_xticks(datemarks)
    pl.ylabel(ylabel)

    return

Ejemplo n.º 2

0

Mostrar archivo

Archivo: plot_UK_school_scenarios8March_27jan.py Proyecto: amath-idm/covasim_uk

def plotter(key, sims, ax, label='', ylabel='', low_q=0.05, high_q=0.95, subsample=2):

    which = key.split('_')[1]
    try:
        color = cv.get_colors()[which]
    except:
        color = [0.5,0.5,0.5]

    ys = []
    for s in sims:
        ys.append(s.results[key].values)
    yarr = np.array(ys)

    best = pl.median(yarr, axis=0)
    low  = pl.quantile(yarr, q=low_q, axis=0)
    high = pl.quantile(yarr, q=high_q, axis=0)


    tvec = np.arange(len(best))
#    tempsim = cv.Sim(datafile='../UK_Covid_cases_january03.xlsx')
#    sim = sims[0]
#    if key in tempsim.data:
#        data_t = np.array((tempsim.data.index-sim['start_day'])/np.timedelta64(1,'D'))
#        inds = np.arange(0, len(data_t), subsample)
#        data = tempsim.data[key][inds]
#        pl.plot(data_t[inds], data, 'd', c=color, markersize=10, alpha=0.5, label='Data')
    fill_label = None
    end = None
    start = 2 if key == 'r_eff' else 0
    pl.fill_between(tvec[start:end], low[start:end], high[start:end], facecolor=color, alpha=0.2, label=fill_label)
    pl.plot(tvec[start:end], best[start:end], c=color, label=label, lw=4, alpha=1.0)

    sc.setylim()

    datemarks = pl.array([sim.day('2020-03-01'),sim.day('2020-06-01'),
                          sim.day('2020-09-01'),sim.day('2020-09-01'),
                          sim.day('2020-12-01'),sim.day('2021-03-01'),
                          sim.day('2021-05-01')])
    ax.set_xticks(datemarks)
    pl.ylabel(ylabel)
    

    return

Ejemplo n.º 3

0

Mostrar archivo

            "75-79", "80-84", "85-89", "90+"
        ]  #["0-29", "30-54", "55+"]
        deaths = raw_deaths
        pos = raw_pos

    # From the model
    mposlist, mdeathlist = [], []
    for hists in agehists:
        mposlist.append(hists['diagnosed'])
        mdeathlist.append(hists['dead'])
    mdeatharr = np.array(mdeathlist)
    mposarr = np.array(mposlist)
    low_q = 0.1
    high_q = 0.9
    raw_mdbest = pl.mean(mdeatharr, axis=0)
    raw_mdlow = pl.quantile(mdeatharr, q=low_q, axis=0)
    raw_mdhigh = pl.quantile(mdeatharr, q=high_q, axis=0)
    raw_mpbest = pl.mean(mposarr, axis=0)
    raw_mplow = pl.quantile(mposarr, q=low_q, axis=0)
    raw_mphigh = pl.quantile(mposarr, q=high_q, axis=0)

    if aggregate:
        mpbest = [
            raw_mpbest[0:6].sum(), raw_mpbest[6:13].sum(),
            raw_mpbest[13:16].sum(), raw_mpbest[16:].sum()
        ]
        mplow = [
            raw_mplow[0:6].sum(), raw_mplow[6:13].sum(),
            raw_mplow[13:16].sum(), raw_mplow[16:].sum()
        ]
        mphigh = [

Ejemplo n.º 4

0

Mostrar archivo

    def run(self, debug=False, keep_people=False, verbose=None, **kwargs):
        '''
        Run the actual scenarios

        Args:
            debug (bool): if True, runs a single run instead of multiple, which makes debugging easier
            verbose (int): level of detail to print, passed to sim.run()
            kwargs (dict): passed to multi_run() and thence to sim.run()

        Returns:
            None (modifies Scenarios object in place)
        '''

        if verbose is None:
            verbose = self['verbose']

        def print_heading(string):
            ''' Choose whether to print a heading, regular text, or nothing '''
            if verbose >= 2:
                sc.heading(string)
            elif verbose == 1:
                print(string)
            return

        reskeys = self.result_keys()  # Shorten since used extensively

        # Loop over scenarios
        for scenkey, scen in self.scenarios.items():
            scenname = scen['name']
            scenpars = scen['pars']

            # This is necessary for plotting, and since self.npts is defined prior to run
            if 'n_days' in scenpars.keys():
                errormsg = 'Scenarios cannot be run with different numbers of days; set via basepars instead'
                raise ValueError(errormsg)

            # Create and run the simulations

            print_heading(f'Multirun for {scenkey}')
            scen_sim = sc.dcp(self.base_sim)
            scen_sim.label = scenkey
            scen_sim.update_pars(scenpars)
            run_args = dict(n_runs=self['n_runs'],
                            noise=self['noise'],
                            noisepar=self['noisepar'],
                            keep_people=keep_people,
                            verbose=verbose)
            if debug:
                print('Running in debug mode (not parallelized)')
                run_args.pop(
                    'n_runs',
                    None)  # Remove n_runs argument, not used for a single run
                scen_sims = [single_run(scen_sim, **run_args, **kwargs)]
            else:
                scen_sims = multi_run(
                    scen_sim, **run_args,
                    **kwargs)  # This is where the sims actually get run

            # Process the simulations
            print_heading(f'Processing {scenkey}')

            scenraw = {}
            for reskey in reskeys:
                scenraw[reskey] = pl.zeros((self.npts, len(scen_sims)))
                for s, sim in enumerate(scen_sims):
                    scenraw[reskey][:, s] = sim.results[reskey].values

            scenres = sc.objdict()
            scenres.best = {}
            scenres.low = {}
            scenres.high = {}
            for reskey in reskeys:
                scenres.best[reskey] = pl.median(
                    scenraw[reskey],
                    axis=1)  # Changed from median to mean for smoother plots
                scenres.low[reskey] = pl.quantile(scenraw[reskey],
                                                  q=self['quantiles']['low'],
                                                  axis=1)
                scenres.high[reskey] = pl.quantile(scenraw[reskey],
                                                   q=self['quantiles']['high'],
                                                   axis=1)

            for reskey in reskeys:
                self.results[reskey][scenkey]['name'] = scenname
                for blh in ['best', 'low', 'high']:
                    self.results[reskey][scenkey][blh] = scenres[blh][reskey]

            self.sims[scenkey] = scen_sims

        #%% Print statistics
        if verbose:
            sc.heading('Results for last day in each scenario:')
            x = defaultdict(dict)
            scenkeys = list(self.scenarios.keys())
            for scenkey in scenkeys:
                for reskey in reskeys:
                    val = self.results[reskey][scenkey].best[-1]
                    if reskey not in ['r_eff', 'doubling_time']:
                        val = int(val)
                    x[scenkey][reskey] = val
            df = pd.DataFrame.from_dict(x).astype(object)
            print(df)
            print()

        # Save details about the run
        self._kept_people = keep_people

        return

Ejemplo n.º 5

0

Mostrar archivo

def plotter(key, sims, ax, ys=None, calib=False, label='', ylabel='', low_q=0.1, high_q=0.9):

    which = key.split('_')[1]
    try:
        color = cv.get_colors()[which]
    except:
        color = [0.5,0.5,0.5]
    if which == 'deaths':
        color = [0.5,0.0,0.0]

    if ys is None:
        ys = []
        for s in sims:
            ys.append(s.results[key].values)

    yarr = np.array(ys)
    best = pl.median(yarr, axis=0) # Changed from median to mean for smoother plots
    low  = pl.quantile(yarr, q=low_q, axis=0)
    high = pl.quantile(yarr, q=high_q, axis=0)

    sim = sims[0] # For having a sim to refer to

    # Formatting parameters
    plot_args   = sc.mergedicts({'lw': 3, 'alpha': 0.8})
    fill_args   = sc.mergedicts({'alpha': 0.2})



    tvec = np.arange(len(best))

    if calib:
        if key == 'r_eff':
            end = -2
        else:
            end = -1
    else:
        end = None

    pl.fill_between(tvec[:end], low[:end], high[:end], facecolor=color, **fill_args)
    pl.plot(tvec[:end], best[:end], c=color, label=label, **plot_args)

    if key in sim.data:
        data_t = np.array((sim.data.index-sim['start_day'])/np.timedelta64(1,'D'))
        pl.plot(data_t, sim.data[key], 'o', c=color, markersize=10, label='Data')

    if calib:
        xlims = pl.xlim()
        pl.xlim([13, xlims[1]-1])
    else:
        pl.xlim([0,94])
    sc.setylim()

    xmin,xmax = ax.get_xlim()
    if calib:
        ax.set_xticks(pl.arange(xmin+2, xmax, 7))
    else:
        ax.set_xticks(pl.arange(xmin+2, xmax, 7))

    pl.ylabel(ylabel)
    pl.legend(loc='upper left')

    return

Ejemplo n.º 6

0

Mostrar archivo

def plot_calibration(sims, date, do_save=0):

    sim = sims[0] # For having a sim to refer to

    # Draw plots
    fig1_path = f'calibration_{date}_fig1.png'
    fig2_path = f'calibration_{date}_fig2.png'
    fig_args    = sc.mergedicts({'figsize': (16, 14)})
    axis_args   = sc.mergedicts({'left': 0.10, 'bottom': 0.05, 'right': 0.95, 'top': 0.93, 'wspace': 0.25, 'hspace': 0.40})

    # Handle input arguments -- merge user input with defaults
    low_q = 0.1
    high_q = 0.9

    # Figure 1: Calibration
    pl.figure(**fig_args)
    pl.subplots_adjust(**axis_args)
    pl.figtext(0.42, 0.95, 'Model calibration', fontsize=30)


    #%% Figure 1, panel 1
    ax = pl.subplot(4,1,1)
    format_ax(ax, sim)
    plotter('new_tests', sims, ax, calib=True, label='Number of tests per day', ylabel='Tests')
    plotter('new_diagnoses', sims, ax, calib=True, label='Number of diagnoses per day', ylabel='Tests')


    #%% Figure 1, panel 2
    ax = pl.subplot(4,1,2)
    format_ax(ax, sim)
    plotter('cum_diagnoses', sims, ax, calib=True, label='Cumulative diagnoses', ylabel='People')


    #%% Figure 1, panel 3
    ax = pl.subplot(4,1,3)
    format_ax(ax, sim)
    plotter('cum_deaths', sims, ax, calib=True, label='Cumulative deaths', ylabel='Deaths')


    #%% Figure 1, panels 4A and 4B

    agehists = []

    for s,sim in enumerate(sims):
        agehist = sim['analyzers'][0]
        if s == 0:
            age_data = agehist.data
        agehists.append(agehist.hists[-1])

    x = age_data['age'].values
    pos = age_data['cum_diagnoses'].values
    death = age_data['cum_deaths'].values

    # From the model
    mposlist = []
    mdeathlist = []
    for hists in agehists:
        mposlist.append(hists['diagnosed'])
        mdeathlist.append(hists['dead'])
    mposarr = np.array(mposlist)
    mdeatharr = np.array(mdeathlist)

    mpbest = pl.median(mposarr, axis=0)
    mplow  = pl.quantile(mposarr, q=low_q, axis=0)
    mphigh = pl.quantile(mposarr, q=high_q, axis=0)
    mdbest = pl.median(mdeatharr, axis=0)
    mdlow  = pl.quantile(mdeatharr, q=low_q, axis=0)
    mdhigh = pl.quantile(mdeatharr, q=high_q, axis=0)

    # Plotting
    w = 4
    off = 2
    bins = x.tolist() + [100]

    ax = pl.subplot(4,2,7)
    c1 = [0.3,0.3,0.6]
    c2 = [0.6,0.7,0.9]
    xx = x+w-off
    pl.bar(x-off,pos, width=w, label='Data', facecolor=c1)
    pl.bar(xx, mpbest, width=w, label='Model', facecolor=c2)
    for i,ix in enumerate(xx):
        pl.plot([ix,ix], [mplow[i], mphigh[i]], c='k')
    ax.set_xticks(bins[:-1])
    pl.title('Diagnosed cases by age')
    pl.xlabel('Age')
    pl.ylabel('Cases')
    pl.legend()

    ax = pl.subplot(4,2,8)
    c1 = [0.5,0.0,0.0]
    c2 = [0.9,0.4,0.3]
    pl.bar(x-off,death, width=w, label='Data', facecolor=c1)
    pl.bar(x+w-off, mdbest, width=w, label='Model', facecolor=c2)
    for i,ix in enumerate(xx):
        pl.plot([ix,ix], [mdlow[i], mdhigh[i]], c='k')
    ax.set_xticks(bins[:-1])
    pl.title('Deaths by age')
    pl.xlabel('Age')
    pl.ylabel('Deaths')
    pl.legend()

    # Tidy up
    if do_save:
        cv.savefig(fig1_path)


    # Figure 2: Projections
    pl.figure(**fig_args)
    pl.subplots_adjust(**axis_args)
    pl.figtext(0.42, 0.95, 'Model estimates', fontsize=30)

    #%% Figure 2, panel 1
    ax = pl.subplot(4,1,1)
    format_ax(ax, sim)
    plotter('cum_infections', sims, ax,calib=True, label='Cumulative infections', ylabel='People')
    plotter('cum_recoveries', sims, ax,calib=True, label='Cumulative recoveries', ylabel='People')

    #%% Figure 2, panel 2
    ax = pl.subplot(4,1,2)
    format_ax(ax, sim)
    plotter('n_infectious', sims, ax,calib=True, label='Number of active infections', ylabel='People')
    plot_intervs(sim, labels=True)

    #%% Figure 2, panel 3
    ax = pl.subplot(4,1,3)
    format_ax(ax, sim)
    plotter('new_infections', sims, ax,calib=True, label='Infections per day', ylabel='People')
    plotter('new_recoveries', sims, ax,calib=True, label='Recoveries per day', ylabel='People')
    plot_intervs(sim)

    #%% Figure 2, panels 4
    ax = pl.subplot(4,1,4)
    format_ax(ax, sim)
    plotter('r_eff', sims, ax, calib=True, label='Effective reproductive number', ylabel=r'$R_{eff}$')

    ylims = [0,4]
    pl.ylim(ylims)
    xlims = pl.xlim()
    pl.plot(xlims, [1, 1], 'k')
    plot_intervs(sim)

    # Tidy up
    if do_save:
        cv.savefig(fig2_path)

    return

Ejemplo n.º 7

0

Mostrar archivo

def plotter(key,
            sims,
            ax,
            ys=None,
            calib=False,
            label='',
            ylabel='',
            low_q=0.025,
            high_q=0.975,
            flabel=True,
            startday=None,
            subsample=2,
            chooseseed=None):

    which = key.split('_')[1]
    try:
        color = cv.get_colors()[which]
    except:
        color = [0.5, 0.5, 0.5]
    if which == 'diagnoses':
        color = [0.03137255, 0.37401, 0.63813918, 1.]
    elif which == '':
        color = [0.82400815, 0., 0., 1.]

    if ys is None:
        ys = []
        for s in sims:
            ys.append(s.results[key].values)

    yarr = np.array(ys)
    if chooseseed is not None:
        best = sims[chooseseed].results[key].values
    else:
        best = pl.median(yarr, axis=0)
    low = pl.quantile(yarr, q=low_q, axis=0)
    high = pl.quantile(yarr, q=high_q, axis=0)

    sim = sims[0]  # For having a sim to refer to

    tvec = np.arange(len(best))
    if key in sim.data:
        data_t = np.array(
            (sim.data.index - sim['start_day']) / np.timedelta64(1, 'D'))
        inds = np.arange(0, len(data_t), subsample)
        pl.plot(data_t[inds],
                sim.data[key][inds],
                'd',
                c=color,
                markersize=15,
                alpha=0.75,
                label='Data')

    start = None
    if startday is not None:
        start = sim.day(startday)
    end = sim.day(calibration_end)
    if flabel:
        if which == 'infections':
            fill_label = '95% projected interval'
        else:
            fill_label = '95% projected interval'
    else:
        fill_label = None
    pl.fill_between(tvec[startday:end],
                    low[startday:end],
                    high[startday:end],
                    facecolor=color,
                    alpha=0.2,
                    label=fill_label)
    pl.plot(tvec[startday:end],
            best[startday:end],
            c=color,
            label=label,
            lw=4,
            alpha=1.0)

    # Print some stats
    if key == 'cum_infections':
        print(
            f'Estimated {which} on July 25: {best[sim.day("2020-07-25")]} (95%: {low[sim.day("2020-07-25")]}-{high[sim.day("2020-07-25")]})'
        )
        print(
            f'Estimated {which} overall: {best[sim.day(calibration_end)]} (95%: {low[sim.day(calibration_end)]}-{high[sim.day(calibration_end)]})'
        )
    elif key == 'n_infectious':
        peakday = sc.findnearest(best, max(best))
        peakval = max(best)
        print(
            f'Estimated peak {which} on {sim.date(peakday)}: {peakval} (95%: {low[peakday]}-{high[peakday]})'
        )
        print(
            f'Estimated {which} on last day: {best[sim.day(calibration_end)]} (95%: {low[sim.day(calibration_end)]}-{high[sim.day(calibration_end)]})'
        )
    elif key == 'cum_diagnoses':
        print(
            f'Estimated {which} overall: {best[sim.day(calibration_end)]} (95%: {low[sim.day(calibration_end)]}-{high[sim.day(calibration_end)]})'
        )

    sc.setylim()

    xmin, xmax = ax.get_xlim()
    if calib:
        ax.set_xticks(pl.arange(xmin + 2, xmax, 28))
    else:
        ax.set_xticks(pl.arange(xmin + 2, xmax, 28))

    pl.ylabel(ylabel)
    datemarks = pl.array([
        sim.day('2020-07-01'),
        sim.day('2020-08-01'),
        sim.day('2020-09-01'),
        sim.day('2020-10-01')
    ]) * 1.
    ax.set_xticks(datemarks)

    return

Ejemplo n.º 8

0

Mostrar archivo

    def run(self,
            keep_sims=False,
            debug=False,
            healthsystems=True,
            verbose=None):
        '''
        Run the actual scenarios

        Args:
            keep_sims (bool): whether or not to store the actual Sim objects in the Scenarios object (NB, very large)
            debug (bool): if True, runs a single run instead of multiple, which makes debugging easier
            healthsystems (bool): whether or not to run a health systems analysis on the results
            verbose (int): level of detail to print, passed to sim.run()

        Returns:
            None (modifies Scenarios object in place)
        '''

        if verbose is None:
            verbose = self['verbose']

        def print_heading(string):
            ''' Choose whether to print a heading, regular text, or nothing '''
            if verbose >= 2:
                sc.heading(string)
            elif verbose == 1:
                print(string)
            return

        reskeys = self.reskeys  # Shorten since used extensively

        # Loop over scenarios
        for scenkey, scen in self.scenarios.items():
            scenname = scen['name']
            scenpars = scen['pars']

            # This is necessary for plotting, and since self.npts is defined prior to run
            if 'n_days' in scenpars.keys():
                errormsg = 'Scenarios cannot be run with different numbers of days; set via basepars instead'
                raise ValueError(errormsg)

            # Create and run the simulations

            print_heading(f'Multirun for {scenkey}')
            scen_sim = sc.dcp(self.base_sim)
            scen_sim.update_pars(scenpars)
            run_args = dict(n_runs=self['n_runs'],
                            noise=self['noise'],
                            noisepar=self['noisepar'],
                            verbose=verbose)
            if debug:
                print('Running in debug mode (not parallelized)')
                run_args.pop(
                    'n_runs',
                    None)  # Remove n_runs argument, not used for a single run
                scen_sims = [single_run(scen_sim, **run_args)]
            else:
                scen_sims = multi_run(
                    scen_sim,
                    **run_args)  # This is where the sims actually get run

            # Process the simulations
            print_heading(f'Processing {scenkey}')

            scenraw = {}
            for reskey in reskeys:
                scenraw[reskey] = pl.zeros((self.npts, len(scen_sims)))
                for s, sim in enumerate(scen_sims):
                    scenraw[reskey][:, s] = sim.results[reskey].values

            scenres = sc.objdict()
            scenres.best = {}
            scenres.low = {}
            scenres.high = {}
            for reskey in reskeys:
                scenres.best[reskey] = pl.mean(
                    scenraw[reskey],
                    axis=1)  # Changed from median to mean for smoother plots
                scenres.low[reskey] = pl.quantile(scenraw[reskey],
                                                  q=self['quantiles']['low'],
                                                  axis=1)
                scenres.high[reskey] = pl.quantile(scenraw[reskey],
                                                   q=self['quantiles']['high'],
                                                   axis=1)

            for reskey in reskeys:
                self.allres[reskey][scenkey]['name'] = scenname
                for blh in ['best', 'low', 'high']:
                    self.allres[reskey][scenkey][blh] = scenres[blh][reskey]

            if keep_sims:
                if 'sims' not in self.allres:
                    self.allres['sims'] = sc.objdict()
                self.allres['sims'][scenkey] = scen_sims
                print(
                    'Note: saving sims, which may produce a large file! Estimated to be:'
                )
                sc.checkmem(
                    self.allres
                )  # Print a warning about how big the file is likely to be

        #%% Print statistics
        if verbose:
            print('\nResults for final time point in each scenario:')
            for reskey in reskeys:
                print(f'\n{reskey}')
                for scenkey in list(self.scenarios.keys()):
                    print(
                        f'  {scenkey}: {self.allres[reskey][scenkey].best[-1]:0.0f}'
                    )
            print()  # Add a blank space

        # Perform health systems analysis
        if healthsystems:
            self.hsys = cvhs.HealthSystem(self.allres)
            self.hsys.analyze()

        return

Ejemplo n.º 9

0

Mostrar archivo

    def estimate_Gap_statistics(self, nrefs):
        masknans = pl.ma.masked_not_equal(self._X[:, 0], 0).mask
        minvals = self._X[masknans, :].min(axis=0)
        maxvals = self._X[masknans, :].max(axis=0)
        meanvals = self._X[masknans, :].mean(axis=0)
        stdvals = self._X[masknans, :].std(axis=0)
        ref_Affinity = []
        Dref = []

        # Compute a random uniform reference distribution of features
        # precompute Distances and affinities.
        for i in range(nrefs):

            random_X = pl.ones_like(self._X)
            # random_X [:,0 ] =np.random.uniform (low = minvals[0] , high=maxvals[0], size=pl.int_( self._X.shape[0]/10 ) )
            random_X[:, 1] = np.random.uniform(
                low=pl.quantile(q=0.16, a=self._X[masknans, 1]),
                high=pl.quantile(q=0.16, a=self._X[masknans, 1]),
                size=pl.int_(self._X.shape[0]),
            )
            random_X[:, 0] = np.random.normal(loc=meanvals[0],
                                              scale=stdvals[0],
                                              size=pl.int_(self._X.shape[0]))
            ref_D = self._metric.pairwise(random_X)
            ref_D = pl.ma.fix_invalid(ref_D, fill_value=1.0).data

            Dref.append(ref_D)

            ref_Affinity.append(pairwise_kernels(ref_D, metric="precomputed"))

        self.Gaps = pl.zeros(len(self.Kvals))
        self.sd = self.Gaps * 0.0
        self.W = self.Gaps * 0.0  # KL index
        p = self._nfeat
        for j, K in enumerate(self.Kvals):
            if self.verbose:
                print(f"Running with K={K} clusters")
            self.clusters = AgglomerativeClustering(
                n_clusters=K,
                affinity="precomputed",
                linkage="average",
                connectivity=self.connectivity,
            )
            self.clusters.fit_predict(self._Affinity)
            # estimate WCSS for the samples
            W = self.get_WCSS(K, self.clusters.labels_, self._distance_matr)
            self.W[j] = W
            # estimate WCSS for random samples
            ref_W = pl.zeros(nrefs)

            for i in range(nrefs):
                ref_clusters = AgglomerativeClustering(
                    n_clusters=K,
                    affinity="precomputed",
                    linkage="average",
                    connectivity=self.connectivity,
                )
                ref_clusters.fit_predict(ref_Affinity[i])
                ref_W[i] = self.get_WCSS(K, ref_clusters.labels_, Dref[i])

            self.sd[j] = np.std(np.log(ref_W)) * np.sqrt(1 + 1.0 / nrefs)
            self.Gaps[j] = np.mean(np.log(ref_W)) - np.log(W)

        ## see section 4 of Tibishrani et al. http://web.stanford.edu/~hastie/Papers/gap.pdf

        gaps_criterion = pl.array(
            [self.Kvals[:-1], self.Gaps[:-1] - self.Gaps[1:] + self.sd[1:]])
        mask = pl.array(gaps_criterion[1, :] >= 0)
        return pl.int_(gaps_criterion[0, mask][0])

Ejemplo n.º 10

0

Mostrar archivo

def plotter(key,
            sims,
            ax,
            ys=None,
            calib=False,
            label='',
            ylabel='',
            low_q=0.025,
            high_q=0.975,
            flabel=True,
            subsample=2):
    ''' Plot a single time series with uncertainty '''

    which = key.split('_')[1]
    try:
        color = cv.get_colors()[which]
    except:
        color = [0.5, 0.5, 0.5]
    if which == 'deaths':
        color = [0.5, 0.0, 0.0]

    if ys is None:
        ys = []
        for i, s in enumerate(sims):
            if i < sims_cutoff:
                ys.append(s.results[key].values)

    yarr = np.array(ys)
    best = pl.median(yarr,
                     axis=0)  # Changed from median to mean for smoother plots
    low = pl.quantile(yarr, q=low_q, axis=0)
    high = pl.quantile(yarr, q=high_q, axis=0)

    sim = sims[0]  # For having a sim to refer to

    tvec = np.arange(len(best))
    data, data_t = None, None
    if key in sim.data:
        data_t = np.array(
            (sim.data.index - sim['start_day']) / np.timedelta64(1, 'D'))
        inds = np.arange(0, len(data_t), subsample)
        data = sim.data[key][inds]
        pl.plot(data_t[inds],
                data,
                'd',
                c=color,
                markersize=10,
                alpha=0.5,
                label='Data')

    end = None
    if flabel:
        if which == 'infections':
            fill_label = '95% predic-\ntion interval'
        else:
            fill_label = '95% prediction\ninterval'
    else:
        fill_label = None

    # Trim the beginning for r_eff and actually plot
    start = 2 if key == 'r_eff' else 0
    pl.fill_between(tvec[start:end],
                    low[start:end],
                    high[start:end],
                    facecolor=color,
                    alpha=0.2,
                    label=fill_label)
    pl.plot(tvec[start:end],
            best[start:end],
            c=color,
            label=label,
            lw=4,
            alpha=1.0)

    sc.setylim()
    xmin, xmax = ax.get_xlim()
    ax.set_xticks(np.arange(xmin, xmax, day_stride))
    pl.ylabel(ylabel)

    plotres[key] = sc.objdict(
        dict(tvec=tvec,
             best=best,
             low=low,
             high=high,
             data=data,
             data_t=data_t))

    return

Ejemplo n.º 11

0

Mostrar archivo

def plot():

    # Create the figure
    fig = pl.figure(num='Fig. 1: Calibration', figsize=(24, 20))
    tx1, ty1 = 0.005, 0.97
    tx2, ty2 = 0.545, 0.66
    ty3 = 0.34
    fsize = 40
    pl.figtext(tx1, ty1, 'a', fontsize=fsize)
    pl.figtext(tx1, ty2, 'b', fontsize=fsize)
    pl.figtext(tx2, ty1, 'c', fontsize=fsize)
    pl.figtext(tx2, ty2, 'd', fontsize=fsize)
    pl.figtext(tx1, ty3, 'e', fontsize=fsize)
    pl.figtext(tx2, ty3, 'f', fontsize=fsize)

    #%% Fig. 1A: diagnoses
    x0, y0, dx, dy = 0.055, 0.73, 0.47, 0.24
    ax1 = pl.axes([x0, y0, dx, dy])
    format_ax(ax1, base_sim)
    plotter('cum_diagnoses',
            sims,
            ax1,
            calib=True,
            label='Model',
            ylabel='Cumulative diagnoses')
    pl.legend(loc='lower right', frameon=False)

    #%% Fig. 1B: deaths
    y0b = 0.42
    ax2 = pl.axes([x0, y0b, dx, dy])
    format_ax(ax2, base_sim)
    plotter('cum_deaths',
            sims,
            ax2,
            calib=True,
            label='Model',
            ylabel='Cumulative deaths')
    pl.legend(loc='lower right', frameon=False)

    #%% Fig. 1A-B inserts (histograms)

    agehists = []

    for s, sim in enumerate(sims):
        agehist = sim['analyzers'][0]
        if s == 0:
            age_data = agehist.data
        agehists.append(agehist.hists[-1])

    # Observed data
    x = age_data['age'].values
    pos = age_data['cum_diagnoses'].values
    death = age_data['cum_deaths'].values

    # Model outputs
    mposlist = []
    mdeathlist = []
    for hists in agehists:
        mposlist.append(hists['diagnosed'])
        mdeathlist.append(hists['dead'])
    mposarr = np.array(mposlist)
    mdeatharr = np.array(mdeathlist)

    low_q = 0.025
    high_q = 0.975
    mpbest = pl.median(mposarr, axis=0)
    mplow = pl.quantile(mposarr, q=low_q, axis=0)
    mphigh = pl.quantile(mposarr, q=high_q, axis=0)
    mdbest = pl.median(mdeatharr, axis=0)
    mdlow = pl.quantile(mdeatharr, q=low_q, axis=0)
    mdhigh = pl.quantile(mdeatharr, q=high_q, axis=0)

    w = 4
    off = 2

    # Insets
    x0s, y0s, dxs, dys = 0.11, 0.84, 0.17, 0.13
    ax1s = pl.axes([x0s, y0s, dxs, dys])
    c1 = [0.3, 0.3, 0.6]
    c2 = [0.6, 0.7, 0.9]
    xx = x + w - off
    pl.bar(x - off, pos, width=w, label='Data', facecolor=c1)
    pl.bar(xx, mpbest, width=w, label='Model', facecolor=c2)
    for i, ix in enumerate(xx):
        pl.plot([ix, ix], [mplow[i], mphigh[i]], c='k')
    ax1s.set_xticks(np.arange(0, 81, 20))
    pl.xlabel('Age')
    pl.ylabel('Cases')
    sc.boxoff(ax1s)
    pl.legend(frameon=False, bbox_to_anchor=(0.7, 1.1))

    y0sb = 0.53
    ax2s = pl.axes([x0s, y0sb, dxs, dys])
    c1 = [0.5, 0.0, 0.0]
    c2 = [0.9, 0.4, 0.3]
    pl.bar(x - off, death, width=w, label='Data', facecolor=c1)
    pl.bar(x + w - off, mdbest, width=w, label='Model', facecolor=c2)
    for i, ix in enumerate(xx):
        pl.plot([ix, ix], [mdlow[i], mdhigh[i]], c='k')
    ax2s.set_xticks(np.arange(0, 81, 20))
    pl.xlabel('Age')
    pl.ylabel('Deaths')
    sc.boxoff(ax2s)
    pl.legend(frameon=False)
    sc.boxoff(ax1s)

    #%% Fig. 1C: infections
    x0, dx = 0.60, 0.38
    ax3 = pl.axes([x0, y0, dx, dy])
    format_ax(ax3, sim)

    # Plot SCAN data
    pop_size = 2.25e6
    scan = pd.read_csv(scan_file)
    for i, r in scan.iterrows():
        label = "Data" if i == 0 else None
        ts = np.mean(sim.day(r['since'], r['to']))
        low = r['lower'] * pop_size
        high = r['upper'] * pop_size
        mean = r['mean'] * pop_size
        ax3.plot([ts] * 2, [low, high], alpha=1.0, color='k', zorder=1000)
        ax3.plot(ts,
                 mean,
                 'o',
                 markersize=7,
                 color='k',
                 alpha=0.5,
                 label=label,
                 zorder=1000)

    # Plot simulation
    plotter('cum_infections',
            sims,
            ax3,
            calib=True,
            label='Cumulative\ninfections\n(modeled)',
            ylabel='Infections')
    plotter('n_infectious',
            sims,
            ax3,
            calib=True,
            label='Active\ninfections\n(modeled)',
            ylabel='Infections',
            flabel=False)
    pl.legend(loc='upper left', frameon=False)
    pl.ylim([0, 130e3])
    plot_intervs(sim)

    #%% Fig. 1C: R_eff
    ax4 = pl.axes([x0, y0b, dx, dy])
    format_ax(ax4, sim, key='r_eff')
    plotter('r_eff',
            sims,
            ax4,
            calib=True,
            label='$R_{eff}$ (modeled)',
            ylabel=r'Effective reproduction number')
    pl.axhline(1, linestyle='--', lw=3, c='k', alpha=0.5)
    pl.legend(loc='upper right', frameon=False)
    plot_intervs(sim)

    #%% Fig. 1E

    # Do the plotting
    pl.subplots_adjust(left=0.04,
                       right=0.52,
                       bottom=0.03,
                       top=0.35,
                       wspace=0.12,
                       hspace=0.50)

    for i, k in enumerate(keys):
        eax = pl.subplot(2, 2, i + 1)

        c1 = [0.2, 0.5, 0.8]
        c2 = [1.0, 0.5, 0.0]
        c3 = [0.1, 0.6, 0.1]
        sns.kdeplot(df1[k],
                    shade=1,
                    linewidth=3,
                    label='',
                    color=c1,
                    alpha=0.5)
        sns.kdeplot(df2[k],
                    shade=0,
                    linewidth=3,
                    label='',
                    color=c2,
                    alpha=0.5)

        pl.title(mapping[k])
        pl.xlabel('')
        pl.yticks([])
        if not i % 4:
            pl.ylabel('Density')

        yfactor = 1.3
        yl = pl.ylim()
        pl.ylim([yl[0], yl[1] * yfactor])

        m1 = np.median(df1[k])
        m2 = np.median(df2[k])
        m1std = df1[k].std()
        m2std = df2[k].std()
        pl.axvline(m1, c=c1, ymax=0.9, lw=3, linestyle='--')
        pl.axvline(m2, c=c2, ymax=0.9, lw=3, linestyle='--')

        def fmt(lab, val, std=-1):
            if val < 0.1:
                valstr = f'{lab} = {val:0.4f}'
            elif val < 1.0:
                valstr = f'{lab} = {val:0.2f}±{std:0.2f}'
            else:
                valstr = f'{lab} = {val:0.1f}±{std:0.1f}'
            if std < 0:
                valstr = valstr.split('±')[0]  # Discard STD if not used
            return valstr

        if k.startswith('bc'):
            pl.xlim([0, 100])
        elif k == 'beta':
            pl.xlim([3, 5])
        elif k.startswith('tn'):
            pl.xlim([0, 50])
        else:
            raise Exception(f'Please assign key {k}')

        xl = pl.xlim()
        xfmap = dict(
            beta=0.15,
            bc_wc1=0.30,
            bc_lf=0.35,
            tn=0.55,
        )

        xf = xfmap[k]
        x0 = xl[0] + xf * (xl[1] - xl[0])

        ypos1 = yl[1] * 0.97
        ypos2 = yl[1] * 0.77
        ypos3 = yl[1] * 0.57

        if k == 'beta':  # Use 2 s.f. instead of 1
            pl.text(x0, ypos1, f'M: {m1:0.2f} ± {m1std:0.2f}', c=c1)
            pl.text(x0, ypos2, f'N: {m2:0.2f} ± {m2std:0.2f}', c=c2)
            pl.text(x0,
                    ypos3,
                    rf'$\Delta$: {(m2-m1):0.2f} ± {(m1std+m2std):0.2f}',
                    c=c3)
        else:
            pl.text(x0, ypos1, f'M: {m1:0.1f} ± {m1std:0.1f}', c=c1)
            pl.text(x0, ypos2, f'N: {m2:0.1f} ± {m2std:0.1f}', c=c2)
            pl.text(x0,
                    ypos3,
                    rf'$\Delta$: {(m2-m1):0.1f} ± {(m1std+m2std):0.1f}',
                    c=c3)

        sc.boxoff(ax=eax)

    #%% Fig. 1F: SafeGraph
    x0, y0c, dyc = 0.60, 0.03, 0.30
    ax5 = pl.axes([x0, y0c, dx, dyc])
    format_ax(ax5, sim, key='r_eff')
    fn = safegraph_file
    df = pd.read_csv(fn)
    week = df['week']
    days = sim.day(week.values.tolist())
    s = df['p.tot.schools'].values * 100
    w = df['p.tot.no.schools'].values * 100

    # From Fig. 2
    colors = sc.gridcolors(5)
    wcolor = colors[3]  # Work color/community
    scolor = colors[1]  # School color

    pl.plot(days,
            w,
            'd-',
            c=wcolor,
            markersize=15,
            lw=3,
            alpha=0.9,
            label='Workplace and\ncommunity mobility data')
    pl.plot(days,
            s,
            'd-',
            c=scolor,
            markersize=15,
            lw=3,
            alpha=0.9,
            label='School mobility data')
    sc.setylim()
    xmin, xmax = ax5.get_xlim()
    ax5.set_xticks(np.arange(xmin, xmax, day_stride))
    pl.ylabel('Relative mobility (%)')
    pl.legend(loc='upper right', frameon=False)
    plot_intervs(sim)

    return fig

Ejemplo n.º 12

0

Mostrar archivo

Archivo: plot_vietnam_scenarios.py Proyecto: optimamodel/covid_vietnam

def plotter(key,
            sims,
            ax,
            ys=None,
            calib=False,
            label='',
            ylabel='',
            low_q=0.025,
            high_q=0.975,
            flabel=True,
            startday=None,
            subsample=2,
            chooseseed=None):

    which = key.split('_')[1]
    try:
        color = cv.get_colors()[which]
    except:
        color = [0.5, 0.5, 0.5]
    if which == 'diagnoses':
        color = [0.03137255, 0.37401, 0.63813918, 1.]
    elif which == '':
        color = [0.82400815, 0., 0., 1.]

    if ys is None:
        ys = []
        for s in sims:
            ys.append(s.results[key].values)

    yarr = np.array(ys)
    if chooseseed is not None:
        best = sims[chooseseed].results[key].values
    else:
        best = pl.median(yarr, axis=0)
    low = pl.quantile(yarr, q=low_q, axis=0)
    high = pl.quantile(yarr, q=high_q, axis=0)

    sim = sims[0]  # For having a sim to refer to

    tvec = np.arange(len(best))
    if key in sim.data:
        data_t = np.array(
            (sim.data.index - sim['start_day']) / np.timedelta64(1, 'D'))
        inds = np.arange(0, len(data_t), subsample)
        pl.plot(data_t[inds],
                sim.data[key][inds],
                'd',
                c=color,
                markersize=10,
                alpha=0.5,
                label='Data')

    start = None
    if startday is not None:
        start = sim.day(startday)
    end = sim.day('2021-03-31')
    if flabel:
        if which == 'infections':
            fill_label = '95% projected interval'
        else:
            fill_label = '95% projected interval'
    else:
        fill_label = None
    pl.fill_between(tvec[startday:end],
                    low[startday:end],
                    high[startday:end],
                    facecolor=color,
                    alpha=0.2,
                    label=fill_label)
    pl.plot(tvec[startday:end],
            best[startday:end],
            c=color,
            label=label,
            lw=4,
            alpha=1.0)
    #for s in sims:
    #    pl.plot(tvec[startday:end], s.results[key].values[startday:end], c=[0.4, 0.4, 0.4], label=label, lw=1, alpha=0.7)

    #sc.setylim()

    datemarks = pl.array([
        sim.day('2020-07-01'),
        sim.day('2020-09-01'),
        sim.day('2020-11-01'),
        sim.day('2021-01-01')
    ]) * 1.
    ax.set_xticks(datemarks)

    pl.ylabel(ylabel)

    return