Example #1
0
def get_subtargets(subtarget, sim):
    '''
    A small helper function to see if subtargeting is a list of indices to use,
    or a function that needs to be called. If a function, it must take a single
    argument, a sim object, and return a list of indices. Also validates the values.
    Currently designed for use with testing interventions, but could be generalized
    to other interventions.

    Args:
        subtarget (dict): dict with keys 'inds' and 'vals'; see test_num() for examples of a valid subtarget dictionary
        sim (Sim): the simulation object
    '''

    # Validation
    if 'inds' not in subtarget:
        errormsg = f'The subtarget dict must have keys "inds" and "vals", but you supplied {subtarget}'
        raise ValueError(errormsg)

    # Handle the two options of type
    if callable(subtarget['inds']):  # A function has been provided
        subtarget_inds = subtarget['inds'](
            sim)  # Call the function to get the indices
    else:
        subtarget_inds = subtarget['inds']  # The indices are supplied directly

    # Validate the values
    subtarget_vals = subtarget['vals']
    if sc.isiterable(subtarget_vals):
        if len(subtarget_vals) != len(subtarget_inds):
            errormsg = f'Length of subtargeting indices ({len(subtarget_inds)}) does not match length of values ({len(subtarget_vals)})'
            raise ValueError(errormsg)

    return subtarget_inds, subtarget_vals
Example #2
0
    def initialize(self, sim):
        ''' Fix days and store beta '''
        if sc.isstring(self.days) or not sc.isiterable(self.days):
            self.days = sc.promotetolist(self.days)
        if isinstance(self.days, list):
            for d, day in enumerate(self.days):
                self.days[d] = sim.day(
                    day
                )  # Ensure it's an integer and not a string or something
        self.days = sc.promotetoarray(self.days)

        self.changes = sc.promotetoarray(self.changes)
        if len(self.days) != len(self.changes):
            errormsg = f'Number of days supplied ({len(self.days)}) does not match number of changes in beta ({len(self.changes)})'
            raise ValueError(errormsg)

        self.orig_betas = {}
        self.layers = sc.promotetolist(self.layers, keepnone=True)
        for lkey in self.layers:
            if lkey is None:
                self.orig_betas['overall'] = sim['beta']
            else:
                self.orig_betas[lkey] = sim['beta_layer'][lkey]

        self.initialized = True
        return
def get_quar_inds(quar_policy, sim):
    '''
    Helper function to return the appropriate indices for people in quarantine
    based on the current quarantine testing "policy". Used by test_num and test_prob.
    Not for use by the user.

    If quar_policy is a number or a list of numbers, then it is interpreted as
    the number of days after the start of quarantine when a test is performed.
    It can also be a function that returns the list of indices.

    Args:
        quar_policy (str, int, list, func): 'start', people entering quarantine; 'end', people leaving; 'both', entering and leaving; 'daily', every day in quarantine
        sim (Sim): the simulation object
    '''
    t = sim.t
    if   quar_policy is None:    quar_test_inds = np.array([])
    elif quar_policy == 'start': quar_test_inds = cvu.true(sim.people.date_quarantined==t-1) # Actually do the day after since testing usually happens before contact tracing
    elif quar_policy == 'end':   quar_test_inds = cvu.true(sim.people.date_end_quarantine==t+1) # +1 since they are released on date_end_quarantine, so do the day before
    elif quar_policy == 'both':  quar_test_inds = np.concatenate([cvu.true(sim.people.date_quarantined==t-1), cvu.true(sim.people.date_end_quarantine==t+1)])
    elif quar_policy == 'daily': quar_test_inds = cvu.true(sim.people.quarantined)
    elif sc.isnumber(quar_policy) or (sc.isiterable(quar_policy) and not sc.isstring(quar_policy)):
        quar_policy = sc.promotetoarray(quar_policy)
        quar_test_inds = np.unique(np.concatenate([cvu.true(sim.people.date_quarantined==t-1-q) for q in quar_policy]))
    elif callable(quar_policy):
        quar_test_inds = quar_policy(sim)
    else:
        errormsg = f'Quarantine policy "{quar_policy}" not recognized: must be a string (start, end, both, daily), int, list, array, set, tuple, or function'
        raise ValueError(errormsg)
    return quar_test_inds
Example #4
0
    def split(self, inds=None, chunks=None):
        '''
        Convenience method for splitting one MultiSim into several. You can specify
        either individual indices of simulations to extract, via inds, or consecutive
        chunks of indices, via chunks. If this function is called on a merged MultiSim,
        the chunks can be retrieved automatically and no arguments are necessary.

        Args:
            inds (list): a list of lists of indices, with each list turned into a MultiSim
            chunks (int or list): if an int, split the MultiSim into chunks of that length; if a list return chunks of that many sims

        Returns:
            A list of MultiSim objects

        **Examples**::

            m1 = cv.MultiSim(cv.Sim(label='sim1'), initialize=True)
            m2 = cv.MultiSim(cv.Sim(label='sim2'), initialize=True)
            m3 = cv.MultiSim.merge(m1, m2)
            m3.run()
            m1b, m2b = m3.split()

            msim = cv.MultiSim(cv.Sim(), n_runs=6)
            msim.run()
            m1, m2 = msim.split(inds=[[0,2,4], [1,3,5]])
            mlist1 = msim.split(chunks=[2,4]) # Equivalent to inds=[[0,1], [2,3,4,5]]
            mlist2 = msim.split(chunks=3) # Equivalent to inds=[[0,1,2], [3,4,5]]
        '''

        # Process indices and chunks
        if inds is None:  # Indices not supplied
            if chunks is None:  # Chunks not supplied
                if hasattr(self, 'chunks'):  # Created from a merged MultiSim
                    inds = self.chunks
                else:  # No indices or chunks and not created from a merge
                    errormsg = f'If a MultiSim has not been created via merge(), you must supply either inds or chunks to split it'
                    raise ValueError(errormsg)
            else:  # Chunks supplied, but not inds
                inds = []  # Initialize
                sim_inds = np.arange(len(self))  # Indices for the simulations
                if sc.isiterable(chunks):  # e.g. chunks = [2,4]
                    chunk_inds = np.cumsum(chunks)[:-1]
                    inds = np.split(sim_inds, chunk_inds)
                else:  # e.g. chunks = 3
                    inds = np.split(
                        sim_inds,
                        chunks)  # This will fail if the length is wrong

        # Do the conversion
        mlist = []
        for indlist in inds:
            sims = sc.dcp([self.sims[i] for i in indlist])
            msim = MultiSim(sims=sims)
            mlist.append(msim)

        return mlist
Example #5
0
 def _make_resdict(self, for_json: bool = True) -> dict:
     ''' Pre-convert the results structure to a friendier output'''
     resdict = {}
     if for_json:
         resdict['timeseries_keys'] = self.reskeys
     for key, res in self.results.items():
         if isinstance(res, Result):
             res = res.values
         if for_json or sc.isiterable(res) and len(res) == self.npts:
             resdict[key] = res
     return resdict
Example #6
0
def process_days(sim, days):
    '''
    Ensure lists of days are in consistent format. Used by change_beta, clip_edges,
    and some analyzers. If day is 'end' or -1, use the final day of the simulation.
    '''
    if sc.isstring(days) or not sc.isiterable(days):
        days = sc.promotetolist(days)
    for d, day in enumerate(days):
        if day in ['end', -1]:
            day = sim['end_day']
        days[d] = sim.day(
            day)  # Ensure it's an integer and not a string or something
    days = sc.promotetoarray(days)
    return days
Example #7
0
 def __init__(self, pars):
     super().__init__()
     subkeys = ['days', 'vals']
     for parkey in pars.keys():
         for subkey in subkeys:
             if subkey not in pars[parkey].keys():
                 errormsg = f'Parameter {parkey} is missing subkey {subkey}'
                 raise KeyError(errormsg)
             if not sc.isiterable(pars[parkey][subkey]):
                 pars[parkey][subkey] = sc.promotetoarray(pars[parkey][subkey])
         len_days = len(pars[parkey]['days'])
         len_vals = len(pars[parkey]['vals'])
         if len_days != len_vals:
             raise ValueError(f'Length of days ({len_days}) does not match length of values ({len_vals}) for parameter {parkey}')
     self.pars = pars
     return
def process_days(sim, days, return_dates=False):
    '''
    Ensure lists of days are in consistent format. Used by change_beta, clip_edges,
    and some analyzers. If day is 'end' or -1, use the final day of the simulation.
    Optionally return dates as well as days.
    '''
    if sc.isstring(days) or not sc.isiterable(days):
        days = sc.promotetolist(days)
    for d,day in enumerate(days):
        if day in ['end', -1]:
            day = sim['end_day']
        days[d] = sim.day(day) # Ensure it's an integer and not a string or something
    days = np.sort(sc.promotetoarray(days)) # Ensure they're an array and in order
    if return_dates:
        dates = [sim.date(day) for day in days] # Store as date strings
        return days, dates
    else:
        return days
def process_days_changes(sim, days, changes):
    '''
    Ensure lists of days and lists of changes are in consistent format. Used by
    change_beta and clip_edges.
    '''

    if sc.isstring(days) or not sc.isiterable(days):
        days = sc.promotetolist(days)
    if isinstance(days, list):
        for d, day in enumerate(days):
            days[d] = sim.day(
                day)  # Ensure it's an integer and not a string or something
    days = sc.promotetoarray(days)

    changes = sc.promotetoarray(changes)
    if len(days) != len(changes):
        errormsg = f'Number of days supplied ({len(days)}) does not match number of changes in beta ({len(changes)})'
        raise ValueError(errormsg)

    return days, changes
Example #10
0
 def compute_losses(self):
     ''' Compute the weighted goodness-of-fit '''
     for key in self.gofs.keys():
         if key in self.weights:
             weight = self.weights[key]
             if sc.isiterable(weight): # It's an array
                 len_wt = len(weight)
                 len_sim = self.sim_npts
                 len_match = len(self.gofs[key])
                 if len_wt == len_match: # If the weight already is the right length, do nothing
                     pass
                 elif len_wt == len_sim: # Most typical case: it's the length of the simulation, must trim
                     weight = weight[self.inds.sim[key]] # Trim to matching indices
                 else:
                     errormsg = f'Could not map weight array of length {len_wt} onto simulation of length {len_sim} or data-model matches of length {len_match}'
                     raise ValueError(errormsg)
         else:
             weight = 1.0
         self.losses[key] = self.gofs[key]*weight
     return
Example #11
0
def check_dist(actual, expected, std=None, dist='norm', check='dist', label=None, alpha=0.05, size=10000, verbose=True, die=False, stats=False):
    """
    Check whether counts match the expected distribution. The distribution can be
    any listed in scipy.stats. The parameters for the distribution should be supplied
    via the "expected" argument. The standard deviation for a normal distribution is
    a special case; it can be supplied separately or calculated from the (actual) data.

    Args:
        actual (int, float, or array) : the observed value, or distribution of values
        expected (int, float, tuple)  : the expected value; or, a tuple of arguments
        std (float)                   : for normal distributions, the standard deviation of the expected value (taken from data if not supplied)
        dist (str)                    : the type of distribution to use
        check (str)                   : what to check: 'dist' = entire distribution (default), 'mean' (equivalent to supplying np.mean(actual)), or 'median'
        label (str)                   : the name of the variable being tested
        alpha (float)                 : the significance level at which to reject the null hypothesis
        size (int)                    : the size of the sample from the expected distribution to compare with if distribution is discrete

        verbose (bool)                : print a warning if the null hypothesis is rejected
        die (bool)                    : raise an exception if the null hypothesis is rejected
        stats (bool)                  : whether to return statistics

    Returns:
        If stats is True, returns statistics: whether null hypothesis is
        rejected, pvalue, number of samples, expected quintiles, observed
        quintiles, and the observed quantile.

    **Examples**::

        sp.check_dist(actual=[3,4,4,2,3], expected=3, dist='poisson')
        sp.check_dist(actual=[0.14, -3.37,  0.59, -0.07], expected=0, std=1.0, dist='norm')
        sp.check_dist(actual=5.5, expected=(1, 5), dist='lognorm')
    """
    # Handle inputs
    label = f' "{label}"' if label else ''
    is_dist = sc.isiterable(actual)

    # Set distribution
    if dist.lower() in ['norm', 'normal', 'gaussian']:
        if std is None:
            if is_dist:
                std = np.std(actual)  # Get standard deviation from the data
            else: # pragma: no cover
                std = 1.0
        args = (expected, std)
        scipydist = getattr(scipy.stats, 'norm')
        truedist = scipy.stats.norm(expected, std)
    else:
        try:
            if sc.isnumber(expected):
                args = (expected, )
            else:
                args = tuple(expected)
            scipydist = getattr(scipy.stats, dist)
            truedist = scipydist(*args)
        except Exception as E:
            errormsg = f'Distribution "{dist}" not supported with the expected values supplied; valid distributions are those in scipy.stats'
            raise NotImplementedError(errormsg) from E

    # Calculate stats
    if is_dist and check == 'dist':
        quantile = truedist.cdf(np.median(actual))

        # only if distribution is continuous
        if isinstance(scipydist, scipy.stats.rv_continuous):
            teststat, pvalue = scipy.stats.kstest(rvs=actual, cdf=dist, args=args)  # Use the K-S test to see if came from the same distribution

        # ks test against large sample from the theoretical distribution
        elif isinstance(scipydist, scipy.stats.rv_discrete):
            expected_r = truedist.rvs(size=size)
            teststat, pvalue = scipy.stats.ks_2samp(actual, expected_r)

        else: # pragma: no cover
            errormsg = 'Distribution is neither continuous or discrete and so not supported at this time.'
            raise NotImplementedError(errormsg)
        null = pvalue > alpha

    else:
        if check == 'mean':
            value = np.mean(actual)
        elif check == 'median':
            value = np.median(actual)
        else:
            value = actual
        quantile = truedist.cdf(value)  # If it's a single value, see where it lands on the Poisson CDF
        pvalue = 1.0-2*abs(quantile-0.5)  # E.g., 0.975 maps on to p=0.05
        minquant = alpha/2  # e.g., 0.025 for alpha=0.05
        maxquant = 1-alpha/2  # e.g., 0.975 for alpha=0.05
        minval = truedist.ppf(minquant)
        maxval = truedist.ppf(maxquant)
        quant_check = (minquant <= quantile <= maxquant)  # True if above minimum and below maximum
        val_check = (minval <= value <= maxval)  # Check values
        null = quant_check or val_check  # Consider it to pass if either passes

    # Additional stats
    n_samples = len(actual) if is_dist else 1
    eps = 1.0/n_samples if n_samples > 4 else 1e-2  # For small number of samples, use default limits
    quintiles = [eps, 0.25, 0.5, 0.75, 1-eps]
    obvs_quin = np.quantile(actual, quintiles) if is_dist else actual
    expect_quin = truedist.ppf(quintiles)

    # If null hypothesis is rejected, print a warning or error
    if not null:
        msg = f''''
Variable{label} with n={n_samples} samples is out of range using the distribution:
    {dist}({args}) →
    p={pvalue} < α={alpha}
Expected quintiles are: {expect_quin}
Observed quintiles are: {obvs_quin}
Observed median is in quantile: {quantile}'''
        if die:
            raise ValueError(msg)
        elif verbose:
            warnings.warn(msg)

    # If null hypothesis is not rejected, under verbose, print a confirmation
    if null and verbose:
        print(f'Check passed. Null hypothesis with expected distribution: {dist}{args} not rejected.')
        if is_dist and check == 'dist':
            print(f'Test statistic: {teststat}, pvalue: {pvalue}')

    if not stats:
        return null
    else:
        s = sc.objdict()
        s.null = null
        s.pvalue = pvalue
        s.n_samples = n_samples
        s.expected_quintiles = expect_quin
        s.observed_quintiles = obvs_quin
        s.observed_quantile = quantile
        return s