def validate_pars(self): ''' Some parameters can take multiple types; this makes them consistent ''' # Handle start day start_day = self['start_day'] # Shorten if start_day in [None, 0]: # Use default start day start_day = dt.date(2020, 1, 1) elif sc.isstring(start_day): start_day = sc.readdate(start_day) if isinstance(start_day,dt.datetime): start_day = start_day.date() self['start_day'] = start_day # Handle contacts contacts = self['contacts'] if sc.isnumber(contacts): # It's a scalar instead of a dict, assume it's all contacts self['contacts'] = {'a':contacts} self['beta_layers'] = {'a':1.0} # Handle population data popdata_choices = ['random', 'microstructure', 'synthpops'] if sc.isnumber(self['pop_type']) or isinstance(self['pop_type'], bool): # Convert e.g. pop_type=1 to 'microstructure' self['pop_type'] = popdata_choices[int(self['pop_type'])] # Choose one of these if self['pop_type'] not in popdata_choices: choice = self['pop_type'] choicestr = ', '.join(popdata_choices) errormsg = f'Population type "{choice}" not available; choices are: {choicestr}' raise ValueError(errormsg) # Handle interventions self['interventions'] = sc.promotetolist(self['interventions'], keepnone=False) return
def day(obj, *args, start_day=None): ''' Convert a string, date/datetime object, or int to a day (int), the number of days since the start day. See also date() and daydiff(). Used primarily via sim.day() rather than directly. Args: obj (str, date, int, or list): convert any of these objects to a day relative to the start day args (list): additional days start_day (str or date): the start day; if none is supplied, return days since 2020-01-01. Returns: days (int or str): the day(s) in simulation time **Example**:: sim.day('2020-04-05') # Returns 35 ''' # Do not process a day if it's not supplied if obj is None: return None if start_day is None: start_day = '2020-01-01' # Convert to list if sc.isstring(obj) or sc.isnumber(obj) or isinstance( obj, (dt.date, dt.datetime)): obj = sc.promotetolist(obj) # Ensure it's iterable elif isinstance(obj, np.ndarray): obj = obj.tolist() # Convert to list if it's an array obj.extend(args) days = [] for d in obj: if d is None: days.append(d) elif sc.isnumber(d): days.append(int(d)) # Just convert to an integer else: try: if sc.isstring(d): d = sc.readdate(d).date() elif isinstance(d, dt.datetime): d = d.date() d_day = (d - date(start_day) ).days # Heavy lifting -- actually compute the day days.append(d_day) except Exception as E: errormsg = f'Could not interpret "{d}" as a date: {str(E)}' raise ValueError(errormsg) # Return an integer rather than a list if only one provided if len(days) == 1: days = days[0] return days
def initialize(self, sim): ''' Fix the dates and dictionaries ''' self.start_day = sim.day(self.start_day) self.end_day = sim.day(self.end_day) self.days = [self.start_day, self.end_day] if sc.isnumber(self.trace_probs): val = self.trace_probs self.trace_probs = {k:val for k in sim.people.layer_keys()} if sc.isnumber(self.trace_time): val = self.trace_time self.trace_time = {k:val for k in sim.people.layer_keys()} return
def date(self, ind, *args, dateformat=None): ''' Convert an integer or list of integer simulation days to a date/list of dates. Args: ind (int, list, or array): the day(s) in simulation time Returns: dates (str or list): the date relative to the simulation start day, as an integer **Example**:: sim.date(35) # Returns '2020-04-05' ''' if sc.isnumber(ind): # If it's a number, convert it to a list ind = sc.promotetolist(ind) ind.extend(args) if dateformat is None: dateformat = '%Y-%m-%d' dates = [] for i in ind: tmp = self['start_day'] + dt.timedelta(days=int(i)) dates.append(tmp.strftime(dateformat)) # Return a string rather than a list if only one provided if len(ind) == 1: dates = dates[0] return dates
def get_quar_inds(quar_policy, sim): ''' Helper function to return the appropriate indices for people in quarantine based on the current quarantine testing "policy". Used by test_num and test_prob. Not for use by the user. If quar_policy is a number or a list of numbers, then it is interpreted as the number of days after the start of quarantine when a test is performed. It can also be a function that returns the list of indices. Args: quar_policy (str, int, list, func): 'start', people entering quarantine; 'end', people leaving; 'both', entering and leaving; 'daily', every day in quarantine sim (Sim): the simulation object ''' t = sim.t if quar_policy is None: quar_test_inds = np.array([]) elif quar_policy == 'start': quar_test_inds = cvu.true(sim.people.date_quarantined==t-1) # Actually do the day after since testing usually happens before contact tracing elif quar_policy == 'end': quar_test_inds = cvu.true(sim.people.date_end_quarantine==t+1) # +1 since they are released on date_end_quarantine, so do the day before elif quar_policy == 'both': quar_test_inds = np.concatenate([cvu.true(sim.people.date_quarantined==t-1), cvu.true(sim.people.date_end_quarantine==t+1)]) elif quar_policy == 'daily': quar_test_inds = cvu.true(sim.people.quarantined) elif sc.isnumber(quar_policy) or (sc.isiterable(quar_policy) and not sc.isstring(quar_policy)): quar_policy = sc.promotetoarray(quar_policy) quar_test_inds = np.unique(np.concatenate([cvu.true(sim.people.date_quarantined==t-1-q) for q in quar_policy])) elif callable(quar_policy): quar_test_inds = quar_policy(sim) else: errormsg = f'Quarantine policy "{quar_policy}" not recognized: must be a string (start, end, both, daily), int, list, array, set, tuple, or function' raise ValueError(errormsg) return quar_test_inds
def validate_layer_pars(self): ''' Handle layer parameters, since they need to be validated after the population creation, rather than before. ''' # First, try to figure out what the layer keys should be and perform basic type checking layer_keys = self.layer_keys() layer_pars = cvpar.layer_pars # The names of the parameters that are specified by layer for lp in layer_pars: val = self[lp] if sc.isnumber( val ): # It's a scalar instead of a dict, assume it's all contacts self[lp] = {k: val for k in layer_keys} # Handle key mismaches for lp in layer_pars: lp_keys = set(self.pars[lp].keys()) if not lp_keys == set(layer_keys): errormsg = f'Layer parameters have inconsistent keys with the layer keys {layer_keys}:' for lp2 in layer_pars: # Fail on first error, but re-loop to list all of them errormsg += f'\n{lp2} = ' + ', '.join(self.pars[lp].keys()) raise sc.KeyNotFoundError(errormsg) # Handle mismatches with the population if self.people is not None: pop_keys = set(self.people.contacts.keys()) if pop_keys != set(layer_keys): errormsg = f'Please update your parameter keys {layer_keys} to match population keys {pop_keys}. You may find sim.reset_layer_pars() helpful.' raise sc.KeyNotFoundError(errormsg) return
def validate_pars(self): ''' Some parameters can take multiple types; this makes them consistent ''' # Handle start day start_day = self['start_day'] # Shorten if start_day in [None, 0]: # Use default start day start_day = dt.datetime(2020, 1, 1) if not isinstance(start_day, dt.datetime): start_day = sc.readdate(start_day) self['start_day'] = start_day # Convert back # Handle population data popdata_choices = ['random', 'bayesian', 'data'] if sc.isnumber(self['usepopdata']) or isinstance( self['usepopdata'], bool): # Convert e.g. usepopdata=1 to 'bayesian' self['usepopdata'] = popdata_choices[int( self['usepopdata'])] # Choose one of these if self['usepopdata'] not in popdata_choices: choice = self['usepopdata'] choicestr = ', '.join(popdata_choices) errormsg = f'Population data option "{choice}" not available; choices are: {choicestr}' raise ValueError(errormsg) # Handle interventions self['interventions'] = sc.promotetolist(self['interventions'], keepnone=False) return
def day(self, day, *args): ''' Convert a string, date/datetime object, or int to a day (int). Args: day (str, date, int, or list): convert any of these objects to a day relative to the simulation's start day Returns: days (int or str): the day(s) in simulation time **Example**:: sim.day('2020-04-05') # Returns 35 ''' # Do not process a day if it's not supplied if day is None: return None # Convert to list if sc.isstring(day) or sc.isnumber(day) or isinstance( day, (dt.date, dt.datetime)): day = sc.promotetolist(day) # Ensure it's iterable day.extend(args) days = [] for d in day: if sc.isnumber(d): days.append(int(d)) # Just convert to an integer else: try: if sc.isstring(d): d = sc.readdate(d).date() elif isinstance(d, dt.datetime): d = d.date() d_day = (d - self['start_day']).days days.append(d_day) except Exception as E: errormsg = f'Could not interpret "{d}" as a date: {str(E)}' raise ValueError(errormsg) # Return an integer rather than a list if only one provided if len(days) == 1: days = days[0] return days
def initialize(self, sim): ''' Fix the dates and dictionaries ''' self.start_day = sim.day(self.start_day) self.end_day = sim.day(self.end_day) self.days = [self.start_day, self.end_day] if self.trace_probs is None: self.trace_probs = 1.0 if self.trace_time is None: self.trace_time = 0.0 if self.quar_period is None: self.quar_period = sim.pars['quar_period'] if sc.isnumber(self.trace_probs): val = self.trace_probs self.trace_probs = {k:val for k in sim.people.layer_keys()} if sc.isnumber(self.trace_time): val = self.trace_time self.trace_time = {k:val for k in sim.people.layer_keys()} self.initialized = True return
def date(self, ind, *args, dateformat=None, as_date=False): ''' Convert one or more integer days of simulation time to a date/list of dates -- by default returns a string, or returns a datetime Date object if as_date is True. See also cv.date(), which provides a partly overlapping set of date conversion features. Args: ind (int, list, or array): the index day(s) in simulation time (NB: strings and date objects are accepted, and will be passed unchanged) args (list): additional day(s) dateformat (str): the format to return the date in as_date (bool): whether to return as a datetime date instead of a string Returns: dates (str, Date, or list): the date(s) corresponding to the simulation day(s) **Examples**:: sim = cv.Sim() sim.date(34) # Returns '2020-04-04' sim.date([34, 54]) # Returns ['2020-04-04', '2020-04-24'] sim.date([34, '2020-04-24']) # Returns ['2020-04-04', '2020-04-24'] sim.date(34, 54, as_date=True) # Returns [datetime.date(2020, 4, 4), datetime.date(2020, 4, 24)] ''' # Handle inputs if not isinstance( ind, list ): # If it's a number, string, or dateobj, convert it to a list ind = sc.promotetolist(ind) ind.extend(args) if dateformat is None: dateformat = '%Y-%m-%d' # Do the conversion dates = [] for raw in ind: if sc.isnumber(raw): date_obj = cvm.date(self['start_day'], as_date=True) + dt.timedelta(days=int(raw)) else: date_obj = cvm.date(raw, as_date=True) if as_date: dates.append(date_obj) else: dates.append(date_obj.strftime(dateformat)) # Return a string rather than a list if only one provided if len(ind) == 1: dates = dates[0] return dates
def validate_pars(self): ''' Some parameters can take multiple types; this makes them consistent ''' # Handle types for key in ['pop_size', 'pop_infected', 'pop_size', 'n_days']: self[key] = int(self[key]) # Handle start day start_day = self['start_day'] # Shorten if start_day in [None, 0]: # Use default start day start_day = '2020-03-01' self['start_day'] = cvm.date(start_day) # Handle contacts contacts = self['contacts'] if sc.isnumber( contacts ): # It's a scalar instead of a dict, assume it's all contacts self['contacts'] = {'a': contacts} # Handle key mismaches beta_layer_keys = set(self.pars['beta_layer'].keys()) contacts_keys = set(self.pars['contacts'].keys()) quar_eff_keys = set(self.pars['quar_eff'].keys()) if not (beta_layer_keys == contacts_keys == quar_eff_keys): errormsg = f'Layer parameters beta={beta_layer_keys}, contacts={contacts_keys}, quar_eff={quar_eff_keys} have inconsistent keys' raise sc.KeyNotFoundError(errormsg) if self.people is not None: pop_keys = set(self.people.contacts.keys()) if pop_keys != beta_layer_keys: errormsg = f'Please update your parameter keys {beta_layer_keys} to match population keys {pop_keys}. You may find sim.reset_layer_pars() helpful.' raise sc.KeyNotFoundError(errormsg) # Handle population data popdata_choices = ['random', 'hybrid', 'clustered', 'synthpops'] choice = self['pop_type'] if choice not in popdata_choices: choicestr = ', '.join(popdata_choices) errormsg = f'Population type "{choice}" not available; choices are: {choicestr}' raise sc.KeyNotFoundError(errormsg) # Handle interventions self['interventions'] = sc.promotetolist(self['interventions'], keepnone=False) for i, interv in enumerate(self['interventions']): if isinstance( interv, dict ): # It's a dictionary representation of an intervention self['interventions'][i] = cvi.InterventionDict(**interv) return
def __init__(self, pars, **kwargs): # Handle pars and population size if sc.isnumber(pars): # Interpret as a population size pars = {'pop_size': pars} # Ensure it's a dictionary self.pars = pars # Equivalent to self.set_pars(pars) self.pop_size = int(pars['pop_size']) # Other initialization self.t = 0 # Keep current simulation time self._lock = False # Prevent further modification of keys self.meta = cvd.PeopleMeta() # Store list of keys and dtypes self.contacts = None self.init_contacts() # Initialize the contacts self.infection_log = [ ] # Record of infections - keys for ['source','target','date','layer'] # Set person properties -- all floats except for UID for key in self.meta.person: if key == 'uid': self[key] = np.arange(self.pop_size, dtype=cvd.default_int) else: self[key] = np.full(self.pop_size, np.nan, dtype=cvd.default_float) # Set health states -- only susceptible is true by default -- booleans for key in self.meta.states: if key == 'susceptible': self[key] = np.full(self.pop_size, True, dtype=bool) else: self[key] = np.full(self.pop_size, False, dtype=bool) # Set dates and durations -- both floats for key in self.meta.dates + self.meta.durs: self[key] = np.full(self.pop_size, np.nan, dtype=cvd.default_float) # Store the dtypes used in a flat dict self._dtypes = {key: self[key].dtype for key in self.keys() } # Assign all to float by default self._lock = True # Stop further keys from being set (does not affect attributes) # Store flows to be computed during simulation self.flows = {key: 0 for key in cvd.new_result_flows} # Although we have called init(), we still need to call initialize() self.initialized = False return
def inds2dates(self, inds, dateformat=None): ''' Convert a set of indices to a set of dates ''' if sc.isnumber(inds): # If it's a number, convert it to a list inds = sc.promotetolist(inds) if dateformat is None: dateformat = '%b-%d' dates = [] for ind in inds: tmp = self['start_day'] + dt.timedelta(days=int(ind)) dates.append(tmp.strftime(dateformat)) return dates
def __init__(self, pars): super().__init__() subkeys = ['days', 'vals'] for parkey in pars.keys(): for subkey in subkeys: if subkey not in pars[parkey].keys(): errormsg = f'Parameter {parkey} is missing subkey {subkey}' raise cvm.KeyNotFoundError(errormsg) if sc.isnumber(pars[parkey][subkey]): # Allow scalar values or dicts, but leave everything else unchanged pars[parkey][subkey] = sc.promotetoarray(pars[parkey][subkey]) len_days = len(pars[parkey]['days']) len_vals = len(pars[parkey]['vals']) if len_days != len_vals: raise ValueError(f'Length of days ({len_days}) does not match length of values ({len_vals}) for parameter {parkey}') self.pars = pars return
def date(obj, *args, **kwargs): ''' Convert a string or a datetime object to a date object. To convert to an integer from the start day, use sim.date() instead. Args: obj (str, date, datetime): the object to convert args (str, date, datetime): additional objects to convert Returns: dates (date or list): either a single date object, or a list of them **Examples**:: cv.date('2020-04-05') # Returns datetime.date(2020, 4, 5) ''' # Convert to list if sc.isstring(obj) or sc.isnumber(obj) or isinstance( obj, (dt.date, dt.datetime)): obj = sc.promotetolist(obj) # Ensure it's iterable obj.extend(args) dates = [] for d in obj: try: if type( d ) == dt.date: # Do not use isinstance, since must be the exact type pass elif sc.isstring(d): d = sc.readdate(d).date() elif isinstance(d, dt.datetime): d = d.date() else: errormsg = f'Could not interpret "{d}" of type {type(d)} as a date' raise TypeError(errormsg) dates.append(d) except Exception as E: errormsg = f'Conversion of "{d}" to a date failed: {str(E)}' raise ValueError(errormsg) # Return an integer rather than a list if only one provided if len(dates) == 1: dates = dates[0] return dates
def __init__(self, pars, **kwargs): super().__init__(**kwargs) # Initialize the Intervention object self._store_args() # Store the input arguments so the intervention can be recreated subkeys = ['days', 'vals'] for parkey in pars.keys(): for subkey in subkeys: if subkey not in pars[parkey].keys(): errormsg = f'Parameter {parkey} is missing subkey {subkey}' raise sc.KeyNotFoundError(errormsg) if sc.isnumber(pars[parkey][subkey]): # Allow scalar values or dicts, but leave everything else unchanged pars[parkey][subkey] = sc.promotetoarray(pars[parkey][subkey]) len_days = len(pars[parkey]['days']) len_vals = len(pars[parkey]['vals']) if len_days != len_vals: raise ValueError(f'Length of days ({len_days}) does not match length of values ({len_vals}) for parameter {parkey}') self.pars = pars return
def validate_pars(self): ''' Some parameters can take multiple types; this makes them consistent ''' # Handle start day start_day = self['start_day'] # Shorten if start_day in [None, 0]: # Use default start day start_day = dt.date(2020, 1, 1) elif sc.isstring(start_day): start_day = sc.readdate(start_day) if isinstance(start_day, dt.datetime): start_day = start_day.date() self['start_day'] = start_day # Handle contacts contacts = self['contacts'] if sc.isnumber( contacts ): # It's a scalar instead of a dict, assume it's all contacts self['contacts'] = {'a': contacts} # Handle key mismaches beta_layer_keys = set(self.pars['beta_layer'].keys()) contacts_keys = set(self.pars['contacts'].keys()) quar_eff_keys = set(self.pars['quar_eff'].keys()) if not (beta_layer_keys == contacts_keys == quar_eff_keys): errormsg = f'Layer parameters beta={beta_layer_keys}, contacts={contacts_keys}, quar_eff={quar_eff_keys} have inconsistent keys' raise cvm.KeyNotFoundError(errormsg) if self.people is not None: pop_keys = set(self.people.contacts.keys()) if pop_keys != beta_layer_keys: errormsg = f'Please update your parameter keys {beta_layer_keys} to match population keys {pop_keys}. You may find sim.reset_layer_pars() helpful.' raise cvm.KeyNotFoundError(errormsg) # Handle population data popdata_choices = ['random', 'hybrid', 'clustered', 'synthpops'] choice = self['pop_type'] if choice not in popdata_choices: choicestr = ', '.join(popdata_choices) errormsg = f'Population type "{choice}" not available; choices are: {choicestr}' raise cvm.KeyNotFoundError(errormsg) # Handle interventions self['interventions'] = sc.promotetolist(self['interventions'], keepnone=False) return
def initialize(self, sim): ''' Fix the dates and number of tests ''' # Process daily tests -- has to be here rather than init so have access to the sim object if sc.isnumber(self.daily_tests): # If a number, convert to an array self.daily_tests = np.array([int(self.daily_tests)]*sim.npts) elif isinstance(self.daily_tests, (pd.Series, pd.DataFrame)): start_date = sim['start_day'] + dt.timedelta(days=self.start_day) end_date = self.daily_tests.index[-1] dateindex = pd.date_range(start_date, end_date) self.daily_tests = self.daily_tests.reindex(dateindex, fill_value=0).to_numpy() # Handle days self.start_day = sim.day(self.start_day) self.end_day = sim.day(self.end_day) self.days = [self.start_day, self.end_day] self.initialized = True return
def set_pars(self, pars=None): ''' Re-link the parameters stored in the people object to the sim containing it, and perform some basic validation. ''' if pars is None: pars = {} elif sc.isnumber(pars): # Interpret as a population size pars = {'pop_size': pars} # Ensure it's a dictionary orig_pars = self.__dict__.get( 'pars') # Get the current parameters using dict's get method pars = sc.mergedicts(orig_pars, pars) if 'pop_size' not in pars: errormsg = f'The parameter "pop_size" must be included in a population; keys supplied were:\n{sc.newlinejoin(pars.keys())}' raise sc.KeyNotFoundError(errormsg) pars['pop_size'] = int(pars['pop_size']) pars.setdefault('location', None) self.pars = pars # Actually store the pars return
def __init__(self, url=None, redisargs=None, *args, **kwargs): # Handle arguments to Redis (or lack thereof) if redisargs is None: redisargs = {} # Handle the Redis URL default_url = 'redis://127.0.0.1:6379/' # The default URL for the Redis database if not url: url = default_url + '0' # e.g. sw.DataStore() elif sc.isnumber(url): url = default_url + '%i' % url # e.g. sw.DataStore(3) self.url = url self.redis = redis.StrictRedis.from_url(self.url, **redisargs) # Finish construction if six.PY2: super(RedisDataStore, self).__init__(*args, **kwargs) else: super().__init__(*args, **kwargs) return None
def date(self, ind, *args, dateformat=None, as_date=False): ''' Convert one or more integer days of simulation time to a date/list of dates -- by default returns a string, or returns a datetime Date object if as_date is True. Args: ind (int, list, or array): the day(s) in simulation time as_date (bool): whether to return as a datetime date instead of a string Returns: dates (str, Date, or list): the date(s) corresponding to the simulation day(s) **Examples**:: sim.date(34) # Returns '2020-04-04' sim.date([34, 54]) # Returns ['2020-04-04', '2020-04-24'] sim.date(34, 54, as_dt=True) # Returns [datetime.date(2020, 4, 4), datetime.date(2020, 4, 24)] ''' # Handle inputs if sc.isnumber(ind): # If it's a number, convert it to a list ind = sc.promotetolist(ind) ind.extend(args) if dateformat is None: dateformat = '%Y-%m-%d' # Do the conversion dates = [] for i in ind: date_obj = self['start_day'] + dt.timedelta(days=int(i)) if as_date: dates.append(date_obj) else: dates.append(date_obj.strftime(dateformat)) # Return a string rather than a list if only one provided if len(ind) == 1: dates = dates[0] return dates
def process_daily_data(daily_data, sim, start_day, as_int=False): ''' This function performs one of two things: if the daily data are supplied as a number, then it converts it to an array of the right length. If the daily data are supplied as a Pandas series or dataframe with a date index, then it reindexes it to match the start date of the simulation. Otherwise, it does nothing. Args: daily_data (number, dataframe, or series): the data to convert to standardized format sim (Sim): the simulation object start_day (date): the start day of the simulation, in already-converted datetime.date format as_int (bool): whether to convert to an integer ''' if sc.isnumber(daily_data): # If a number, convert to an array if as_int: daily_data = int(daily_data) # Make it an integer daily_data = np.array([daily_data] * sim.npts) elif isinstance(daily_data, (pd.Series, pd.DataFrame)): start_date = sim['start_day'] + dt.timedelta(days=start_day) end_date = daily_data.index[-1] dateindex = pd.date_range(start_date, end_date) daily_data = daily_data.reindex(dateindex, fill_value=0).to_numpy() return daily_data
def validate_pars(self): ''' Some parameters can take multiple types; this makes them consistent ''' # Handle types for key in ['pop_size', 'pop_infected', 'pop_size']: try: self[key] = int(self[key]) except Exception as E: errormsg = f'Could not convert {key}={self[key]} of {type(self[key])} to integer' raise ValueError(errormsg) from E # Handle start day start_day = self['start_day'] # Shorten if start_day in [None, 0]: # Use default start day start_day = '2020-03-01' self['start_day'] = cvm.date(start_day) # Handle end day and n_days end_day = self['end_day'] n_days = self['n_days'] if end_day: self['end_day'] = cvm.date(end_day) n_days = cvm.daydiff(self['start_day'], self['end_day']) if n_days <= 0: errormsg = f"Number of days must be >0, but you supplied start={str(self['start_day'])} and end={str(self['end_day'])}, which gives n_days={n_days}" raise ValueError(errormsg) else: self['n_days'] = int(n_days) else: if n_days: self['n_days'] = int(n_days) self['end_day'] = self.date( n_days) # Convert from the number of days to the end day else: errormsg = f'You must supply one of n_days and end_day, not "{n_days}" and "{end_day}"' raise ValueError(errormsg) # Handle parameters specified by layer # Try to figure out what the layer keys should be layer_keys = None # e.g. household, school layer_pars = ['beta_layer', 'contacts', 'iso_factor', 'quar_factor'] if self.people is not None: layer_keys = set(self.people.contacts.keys()) elif isinstance(self['beta_layer'], dict): layer_keys = list( self['beta_layer'].keys() ) # Get keys from beta_layer since the "most required" layer parameter else: layer_keys = [ 'a' ] # Assume this by default, corresponding to random/no layers # Convert scalar layer parameters to dictionaries for lp in layer_pars: val = self[lp] if sc.isnumber( val ): # It's a scalar instead of a dict, assume it's all contacts self[lp] = {k: val for k in layer_keys} # Handle key mismaches for lp in layer_pars: lp_keys = set(self.pars[lp].keys()) if not lp_keys == set(layer_keys): errormsg = f'Layer parameters have inconsistent keys:' for lp2 in layer_pars: # Fail on first error, but re-loop to list all of them errormsg += f'\n{lp2} = ' + ', '.join(self.pars[lp].keys()) raise sc.KeyNotFoundError(errormsg) if self.people is not None: pop_keys = set(self.people.contacts.keys()) if pop_keys != layer_keys: errormsg = f'Please update your parameter keys {layer_keys} to match population keys {pop_keys}. You may find sim.reset_layer_pars() helpful.' raise sc.KeyNotFoundError(errormsg) # Handle population data popdata_choices = ['random', 'hybrid', 'clustered', 'synthpops'] choice = self['pop_type'] if choice not in popdata_choices: choicestr = ', '.join(popdata_choices) errormsg = f'Population type "{choice}" not available; choices are: {choicestr}' raise ValueError(errormsg) # Handle interventions self['interventions'] = sc.promotetolist(self['interventions'], keepnone=False) for i, interv in enumerate(self['interventions']): if isinstance( interv, dict ): # It's a dictionary representation of an intervention self['interventions'][i] = cvi.InterventionDict(**interv) return
def get_doubling_time(sim, series=None, interval=None, start_day=None, end_day=None, moving_window=None, exp_approx=False, max_doubling_time=100, eps=1e-3, verbose=None): ''' Method to calculate doubling time. **Examples** :: get_doubling_time(sim, interval=[3,30]) # returns the doubling time over the given interval (single float) get_doubling_time(sim, interval=[3,30], moving_window=3) # returns doubling times calculated over moving windows (array) ''' # Set verbose level if verbose is None: verbose = sim['verbose'] # Validate inputs: series if series is None or isinstance(series, str): if not sim.results_ready: raise Exception( f"Results not ready, cannot calculate doubling time") else: if series is None or series not in sim.result_keys(): sc.printv( f"Series not supplied or not found in results; defaulting to use cumulative exposures", 1, verbose) series = 'cum_infections' series = sim.results[series].values else: series = sc.promotetoarray(series) # Validate inputs: interval if interval is not None: if len(interval) != 2: sc.printv( f"Interval should be a list/array/tuple of length 2, not {len(interval)}. Resetting to length of series.", 1, verbose) interval = [0, len(series)] start_day, end_day = interval[0], interval[1] if len(series) < end_day: sc.printv( f"End day {end_day} is after the series ends ({len(series)}). Resetting to length of series.", 1, verbose) end_day = len(series) int_length = end_day - start_day # Deal with moving window if moving_window is not None: if not sc.isnumber(moving_window): sc.printv( f"Moving window should be an integer; ignoring and calculating single result", 1, verbose) doubling_time = get_doubling_time(sim, series=series, start_day=start_day, end_day=end_day, moving_window=None, exp_approx=exp_approx) else: if not isinstance(moving_window, int): sc.printv( f"Moving window should be an integer; recasting {moving_window} the nearest integer... ", 1, verbose) moving_window = int(moving_window) if moving_window < 2: sc.printv( f"Moving window should be greater than 1; recasting {moving_window} to 2", 1, verbose) moving_window = 2 doubling_time = [] for w in range(int_length - moving_window + 1): this_start = start_day + w this_end = this_start + moving_window this_doubling_time = get_doubling_time(sim, series=series, start_day=this_start, end_day=this_end, exp_approx=exp_approx) doubling_time.append(this_doubling_time) # Do calculations else: if not exp_approx: try: import statsmodels.api as sm except ModuleNotFoundError as E: errormsg = f'Could not import statsmodels ({E}), falling back to exponential approximation' print(errormsg) exp_approx = True if exp_approx: if series[start_day] > 0: r = series[end_day] / series[start_day] if r > 1: doubling_time = int_length * np.log(2) / np.log(r) doubling_time = min( doubling_time, max_doubling_time) # Otherwise, it's unbounded else: raise ValueError( f"Can't calculate doubling time with exponential approximation when initial value is zero." ) else: if np.any(series[start_day:end_day] ): # Deal with zero values if possible nonzero = np.nonzero(series[start_day:end_day])[0] if len(nonzero) >= 2: exog = sm.add_constant(np.arange(len(nonzero))) endog = np.log2((series[start_day:end_day])[nonzero]) model = sm.OLS(endog, exog) doubling_rate = model.fit().params[1] if doubling_rate > eps: doubling_time = 1.0 / doubling_rate else: doubling_time = max_doubling_time else: raise ValueError( f"Can't calculate doubling time for series {series[start_day:end_day]}. Check whether series is growing." ) else: raise ValueError( f"Can't calculate doubling time for series {series[start_day:end_day]}. Check whether series is growing." ) return doubling_time
def _get_ia(self, which, label=None, partial=False, as_list=False, as_inds=False, die=True, first=False): ''' Helper method for get_interventions() and get_analyzers(); see get_interventions() docstring ''' # Handle inputs if which not in ['interventions', 'analyzers']: errormsg = f'This method is only defined for interventions and analyzers, not "{which}"' raise ValueError(errormsg) ia_list = self.pars[which] # List of interventions or analyzers n_ia = len(ia_list) # Number of interventions/analyzers if label == 'summary': # Print a summary of the interventions df = pd.DataFrame(columns=['ind', 'label', 'type']) for ind,ia_obj in enumerate(ia_list): df = df.append(dict(ind=ind, label=str(ia_obj.label), type=type(ia_obj)), ignore_index=True) print(f'Summary of {which}:') print(df) return else: # Standard usage case position = 0 if first else -1 # Choose either the first or last element if label is None: label = position # Get the last element labels = sc.promotetolist(label) # Calculate the matches matches = [] match_inds = [] for label in labels: if sc.isnumber(label): matches.append(ia_list[label]) # This will raise an exception if an invalid index is given label = n_ia + label if label<0 else label # Convert to a positive number match_inds.append(label) elif sc.isstring(label) or isinstance(label, type): for ind,ia_obj in enumerate(ia_list): if sc.isstring(label) and ia_obj.label == label or (partial and (label in str(ia_obj.label))): matches.append(ia_obj) match_inds.append(ind) elif isinstance(label, type) and isinstance(ia_obj, label): matches.append(ia_obj) match_inds.append(ind) else: errormsg = f'Could not interpret label type "{type(label)}": should be str, int, or {which} class' raise TypeError(errormsg) # Parse the output options if as_inds: output = match_inds elif as_list: output = matches else: # Normal case, return actual interventions if len(matches) == 0: if die: errormsg = f'No {which} matching "{label}" were found' raise ValueError(errormsg) else: output = None else: output = matches[position] # Return either the first or last match return output
def test_baseline(): ''' Compare the current default sim against the saved baseline ''' # Load existing baseline baseline = sc.loadjson(baseline_filename) old = baseline[baseline_key] # Calculate new baseline sim = cv.Sim(verbose=0) sim.run() new = sim.summary # Compare keys errormsg = '' old_keys = set(old.keys()) new_keys = set(new.keys()) if old_keys != new_keys: errormsg = f"Keys don't match!\n" missing = old_keys - new_keys extra = new_keys - old_keys if missing: errormsg += f' Missing old keys: {missing}\n' if extra: errormsg += f' Extra new keys: {extra}\n' mismatches = {} union = old_keys.union(new_keys) for key in new.keys(): # To ensure order if key in union: old_val = old[key] if key in old else 'not present' new_val = new[key] if key in new else 'not present' if old_val != new_val: mismatches[key] = {'old': old_val, 'new': new_val} if len(mismatches): errormsg = '\nThe following values have changed from the previous baseline!\n' errormsg += 'If this is intentional, please rerun "tests/update_baseline" and commit.\n' errormsg += 'Mismatches:\n' df = pd.DataFrame.from_dict(mismatches).transpose() diff = [] ratio = [] change = [] small_change = 1e-3 # Define a small change, e.g. a rounding error for mdict in mismatches.values(): old = mdict['old'] new = mdict['new'] if sc.isnumber(new) and sc.isnumber(old) and old>0: this_diff = new - old this_ratio = new/old abs_ratio = max(this_ratio, 1.0/this_ratio) # Set the character to use if abs_ratio<small_change: change_char = '≈' elif new > old: change_char = '↑' elif new < old: change_char = '↓' else: errormsg = f'Could not determine relationship between old={old} and new={new}' raise ValueError(errormsg) # Set how many repeats it should have repeats = 1 if abs_ratio >= 1.1: repeats = 2 if abs_ratio >= 2: repeats = 3 if abs_ratio >= 10: repeats = 4 this_change = change_char*repeats else: this_diff = np.nan this_ratio = np.nan this_change = 'N/A' diff.append(this_diff) ratio.append(this_ratio) change.append(this_change) df['diff'] = diff df['ratio'] = ratio for col in ['old', 'new', 'diff', 'ratio']: df[col] = df[col].round(decimals=3) df['change'] = change errormsg += str(df) # Raise an error if mismatches were found if errormsg: raise ValueError(errormsg) else: print('Baseline matches') return new
def date(obj, *args, start_date=None, dateformat=None, as_date=True): ''' Convert a string or a datetime object to a date object. To convert to an integer from the start day, it is recommended you supply a start date, or use sim.date() instead; otherwise, it will calculate the date counting days from 2020-01-01. This means that the output of cv.date() will not necessarily match the output of sim.date() for an integer input. Args: obj (str, date, datetime, list, array): the object to convert args (str, date, datetime): additional objects to convert start_date (str, date, datetime): the starting date, if an integer is supplied dateformat (str): the format to return the date in as_date (bool): whether to return as a datetime date instead of a string Returns: dates (date or list): either a single date object, or a list of them **Examples**:: cv.date('2020-04-05') # Returns datetime.date(2020, 4, 5) cv.date('2020-04-14', start_date='2020-04-04', as_date=False) # Returns 10 cv.date([35,36,37], as_date=False) # Returns ['2020-02-05', '2020-02-06', '2020-02-07'] ''' if obj is None: return None # Convert to list and handle other inputs if isinstance(obj, np.ndarray): obj = obj.tolist() # If it's an array, convert to a list obj = sc.promotetolist(obj) # Ensure it's iterable obj.extend(args) if dateformat is None: dateformat = '%Y-%m-%d' if start_date is None: start_date = '2020-01-01' dates = [] for d in obj: if d is None: dates.append(d) continue try: if type( d ) == dt.date: # Do not use isinstance, since must be the exact type pass elif sc.isstring(d): d = sc.readdate(d).date() elif isinstance(d, dt.datetime): d = d.date() elif sc.isnumber(d): if start_date is None: errormsg = f'To convert the number {d} to a date, you must supply start_date' raise ValueError(errormsg) d = date(start_date) + dt.timedelta(days=int(d)) else: errormsg = f'Cannot interpret {type(d)} as a date, must be date, datetime, or string' raise TypeError(errormsg) if as_date: dates.append(d) else: dates.append(d.strftime(dateformat)) except Exception as E: errormsg = f'Conversion of "{d}" to a date failed: {str(E)}' raise ValueError(errormsg) # Return an integer rather than a list if only one provided if len(dates) == 1: dates = dates[0] return dates
def __init__(self, pars, strict=True, **kwargs): # Handle pars and population size if sc.isnumber(pars): # Interpret as a population size pars = {'pop_size': pars} # Ensure it's a dictionary self.pars = pars # Equivalent to self.set_pars(pars) self.pars['pop_size'] = int(pars['pop_size']) self.pars.setdefault('n_strains', 1) self.pars.setdefault('location', None) self.version = cvv.__version__ # Store version info # Other initialization self.t = 0 # Keep current simulation time self._lock = False # Prevent further modification of keys self.meta = cvd.PeopleMeta() # Store list of keys and dtypes self.contacts = None self.init_contacts() # Initialize the contacts self.infection_log = [ ] # Record of infections - keys for ['source','target','date','layer'] # Set person properties -- all floats except for UID for key in self.meta.person: if key == 'uid': self[key] = np.arange(self.pars['pop_size'], dtype=cvd.default_int) else: self[key] = np.full(self.pars['pop_size'], np.nan, dtype=cvd.default_float) # Set health states -- only susceptible is true by default -- booleans except exposed by strain which should return the strain that ind is exposed to for key in self.meta.states: val = ( key in ['susceptible', 'naive'] ) # Default value is True for susceptible and naive, false otherwise self[key] = np.full(self.pars['pop_size'], val, dtype=bool) # Set strain states, which store info about which strain a person is exposed to for key in self.meta.strain_states: self[key] = np.full(self.pars['pop_size'], np.nan, dtype=cvd.default_float) for key in self.meta.by_strain_states: self[key] = np.full( (self.pars['n_strains'], self.pars['pop_size']), False, dtype=bool) # Set immunity and antibody states for key in self.meta.imm_states: # Everyone starts out with no immunity self[key] = np.zeros( (self.pars['n_strains'], self.pars['pop_size']), dtype=cvd.default_float) for key in self.meta.nab_states: # Everyone starts out with no antibodies self[key] = np.full(self.pars['pop_size'], np.nan, dtype=cvd.default_float) for key in self.meta.vacc_states: self[key] = np.zeros(self.pars['pop_size'], dtype=cvd.default_int) # Set dates and durations -- both floats for key in self.meta.dates + self.meta.durs: self[key] = np.full(self.pars['pop_size'], np.nan, dtype=cvd.default_float) # Store the dtypes used in a flat dict self._dtypes = {key: self[key].dtype for key in self.keys() } # Assign all to float by default self._lock = strict # If strict is true, stop further keys from being set (does not affect attributes) # Store flows to be computed during simulation self.init_flows() # Although we have called init(), we still need to call initialize() self.initialized = False # Handle contacts, if supplied (note: they usually are) if 'contacts' in kwargs: self.add_contacts(kwargs.pop('contacts')) # Handle all other values, e.g. age for key, value in kwargs.items(): if strict: self.set(key, value) else: self[key] = value self._pending_quarantine = defaultdict( list ) # Internal cache to record people that need to be quarantined on each timestep {t:(person, quarantine_end_day)} return
def check_dist(actual, expected, std=None, dist='norm', check='dist', label=None, alpha=0.05, size=10000, verbose=True, die=False, stats=False): """ Check whether counts match the expected distribution. The distribution can be any listed in scipy.stats. The parameters for the distribution should be supplied via the "expected" argument. The standard deviation for a normal distribution is a special case; it can be supplied separately or calculated from the (actual) data. Args: actual (int, float, or array) : the observed value, or distribution of values expected (int, float, tuple) : the expected value; or, a tuple of arguments std (float) : for normal distributions, the standard deviation of the expected value (taken from data if not supplied) dist (str) : the type of distribution to use check (str) : what to check: 'dist' = entire distribution (default), 'mean' (equivalent to supplying np.mean(actual)), or 'median' label (str) : the name of the variable being tested alpha (float) : the significance level at which to reject the null hypothesis size (int) : the size of the sample from the expected distribution to compare with if distribution is discrete verbose (bool) : print a warning if the null hypothesis is rejected die (bool) : raise an exception if the null hypothesis is rejected stats (bool) : whether to return statistics Returns: If stats is True, returns statistics: whether null hypothesis is rejected, pvalue, number of samples, expected quintiles, observed quintiles, and the observed quantile. **Examples**:: sp.check_dist(actual=[3,4,4,2,3], expected=3, dist='poisson') sp.check_dist(actual=[0.14, -3.37, 0.59, -0.07], expected=0, std=1.0, dist='norm') sp.check_dist(actual=5.5, expected=(1, 5), dist='lognorm') """ # Handle inputs label = f' "{label}"' if label else '' is_dist = sc.isiterable(actual) # Set distribution if dist.lower() in ['norm', 'normal', 'gaussian']: if std is None: if is_dist: std = np.std(actual) # Get standard deviation from the data else: # pragma: no cover std = 1.0 args = (expected, std) scipydist = getattr(scipy.stats, 'norm') truedist = scipy.stats.norm(expected, std) else: try: if sc.isnumber(expected): args = (expected, ) else: args = tuple(expected) scipydist = getattr(scipy.stats, dist) truedist = scipydist(*args) except Exception as E: errormsg = f'Distribution "{dist}" not supported with the expected values supplied; valid distributions are those in scipy.stats' raise NotImplementedError(errormsg) from E # Calculate stats if is_dist and check == 'dist': quantile = truedist.cdf(np.median(actual)) # only if distribution is continuous if isinstance(scipydist, scipy.stats.rv_continuous): teststat, pvalue = scipy.stats.kstest(rvs=actual, cdf=dist, args=args) # Use the K-S test to see if came from the same distribution # ks test against large sample from the theoretical distribution elif isinstance(scipydist, scipy.stats.rv_discrete): expected_r = truedist.rvs(size=size) teststat, pvalue = scipy.stats.ks_2samp(actual, expected_r) else: # pragma: no cover errormsg = 'Distribution is neither continuous or discrete and so not supported at this time.' raise NotImplementedError(errormsg) null = pvalue > alpha else: if check == 'mean': value = np.mean(actual) elif check == 'median': value = np.median(actual) else: value = actual quantile = truedist.cdf(value) # If it's a single value, see where it lands on the Poisson CDF pvalue = 1.0-2*abs(quantile-0.5) # E.g., 0.975 maps on to p=0.05 minquant = alpha/2 # e.g., 0.025 for alpha=0.05 maxquant = 1-alpha/2 # e.g., 0.975 for alpha=0.05 minval = truedist.ppf(minquant) maxval = truedist.ppf(maxquant) quant_check = (minquant <= quantile <= maxquant) # True if above minimum and below maximum val_check = (minval <= value <= maxval) # Check values null = quant_check or val_check # Consider it to pass if either passes # Additional stats n_samples = len(actual) if is_dist else 1 eps = 1.0/n_samples if n_samples > 4 else 1e-2 # For small number of samples, use default limits quintiles = [eps, 0.25, 0.5, 0.75, 1-eps] obvs_quin = np.quantile(actual, quintiles) if is_dist else actual expect_quin = truedist.ppf(quintiles) # If null hypothesis is rejected, print a warning or error if not null: msg = f'''' Variable{label} with n={n_samples} samples is out of range using the distribution: {dist}({args}) → p={pvalue} < α={alpha} Expected quintiles are: {expect_quin} Observed quintiles are: {obvs_quin} Observed median is in quantile: {quantile}''' if die: raise ValueError(msg) elif verbose: warnings.warn(msg) # If null hypothesis is not rejected, under verbose, print a confirmation if null and verbose: print(f'Check passed. Null hypothesis with expected distribution: {dist}{args} not rejected.') if is_dist and check == 'dist': print(f'Test statistic: {teststat}, pvalue: {pvalue}') if not stats: return null else: s = sc.objdict() s.null = null s.pvalue = pvalue s.n_samples = n_samples s.expected_quintiles = expect_quin s.observed_quintiles = obvs_quin s.observed_quantile = quantile return s
def __init__(self, pars, strict=True, **kwargs): # Handle pars and population size if sc.isnumber(pars): # Interpret as a population size pars = {'pop_size': pars} # Ensure it's a dictionary self.pars = pars # Equivalent to self.set_pars(pars) self.pop_size = int(pars['pop_size']) self.location = pars.get( 'location') # Try to get location, but set to None otherwise self.version = cvv.__version__ # Store version info # Other initialization self.t = 0 # Keep current simulation time self._lock = False # Prevent further modification of keys self.meta = cvd.PeopleMeta() # Store list of keys and dtypes self.contacts = None self.init_contacts() # Initialize the contacts self.infection_log = [ ] # Record of infections - keys for ['source','target','date','layer'] # Set person properties -- all floats except for UID for key in self.meta.person: if key == 'uid': self[key] = np.arange(self.pop_size, dtype=cvd.default_int) else: self[key] = np.full(self.pop_size, np.nan, dtype=cvd.default_float) # Set health states -- only susceptible is true by default -- booleans for key in self.meta.states: if key == 'susceptible': self[key] = np.full(self.pop_size, True, dtype=bool) else: self[key] = np.full(self.pop_size, False, dtype=bool) # Set dates and durations -- both floats for key in self.meta.dates + self.meta.durs: self[key] = np.full(self.pop_size, np.nan, dtype=cvd.default_float) # Store the dtypes used in a flat dict self._dtypes = {key: self[key].dtype for key in self.keys() } # Assign all to float by default self._lock = strict # If strict is true, stop further keys from being set (does not affect attributes) # Store flows to be computed during simulation self.flows = {key: 0 for key in cvd.new_result_flows} # Although we have called init(), we still need to call initialize() self.initialized = False # Handle contacts, if supplied (note: they usually are) if 'contacts' in kwargs: self.add_contacts(kwargs.pop('contacts')) # Handle all other values, e.g. age for key, value in kwargs.items(): if strict: self.set(key, value) else: self[key] = value self._pending_quarantine = defaultdict( list ) # Internal cache to record people that need to be quarantined on each timestep {t:(person, quarantine_end_day)} return