def go_to_daily_work_meeting(states, params, seed): # noqa: U100 """Return which people go to work. Args: states (pandas.DataFrame): sid states DataFrame params (pandas.DataFrame): DataFrame with two index levels, subcategory and name. has a "value" column that contains the probabilities to the number of possible columns in the "name" index level. Returns: attends_work (pandas.Series): same index as states. 1 for workers that go to work this period, 0 for everyone else. """ date = get_date(states) day = date.day_name() attends_work = (states["occupation"] == "working") & (states["work_daily_group_id"] != -1) if day in ["Saturday", "Sunday"]: attends_work = attends_work & states[f"work_{day.lower()}"] else: for params_entry, condition in [ ("symptomatic_multiplier", states["symptomatic"]), ("positive_test_multiplier", states["knows_currently_infected"]), ]: attends_work = reduce_contacts_on_condition( attends_work, states, params.loc[(params_entry, params_entry), "value"], condition, is_recurrent=True, ) return attends_work
def _calculate_work_rapid_test_demand(states, contacts, compliance_multiplier): date = get_date(states) work_cols = [col for col in contacts if col.startswith("work_")] has_work_contacts = (contacts[work_cols] > 0).any(axis=1) # starting 2021-04-26 every worker must be offered two tests per week # source: https://bit.ly/2Qw4Md6 # To have a gradual transition we gradually increase the test frequency if date < pd.Timestamp("2021-04-07"): # before Easter allowed_days_btw_tests = 7 elif date < pd.Timestamp("2021-04-13"): allowed_days_btw_tests = 6 elif date < pd.Timestamp("2021-04-20"): allowed_days_btw_tests = 5 elif date < pd.Timestamp("2021-04-27"): allowed_days_btw_tests = 4 else: # date > pd.Timestamp("2021-04-26") allowed_days_btw_tests = 3 too_long_since_last_test = (states["cd_received_rapid_test"] <= -allowed_days_btw_tests) should_get_test = has_work_contacts & too_long_since_last_test complier = states["rapid_test_compliance"] >= (1 - compliance_multiplier) receives_offer_and_accepts = should_get_test & complier work_rapid_test_demand = should_get_test & receives_offer_and_accepts return work_rapid_test_demand
def reduce_recurrent_model( states, contacts, seed, multiplier, params=None # noqa: U100 ): """Reduce the number of recurrent contacts taking place by a multiplier. For recurrent contacts the contacts Series is boolean. Therefore, simply multiplying the number of contacts with it would not have an effect on the number of contacts taking place. Instead we make a random share of individuals scheduled to participate not participate. Args: multiplier (float or pd.Series): Must be smaller or equal to one. If a Series is supplied the index must be dates. Returns: reduced (pandas.Series): same index as states. For a *multiplier* fraction of the population the contacts have been set to False. The more individuals already had a False there, the smaller the effect. """ np.random.seed(seed) if isinstance(multiplier, pd.Series): date = get_date(states) multiplier = multiplier[date] contacts = contacts.to_numpy() resampled_contacts = boolean_choices(np.full(len(states), multiplier)) reduced = np.where(contacts, resampled_contacts, contacts) return pd.Series(reduced, index=states.index)
def _identify_who_attends_because_of_a_b_schooling(states, a_b_query, a_b_rhythm): """Identify who attends school because (s)he is a student in A/B mode. We can ignore educ workers here because they are already covered in attends_always. Same for children coverey by emergency care. Returns: attends_because_of_a_b_schooling (pandas.Series): True for individuals that are in rotating split classes and whose half of class is attending today. """ if isinstance(a_b_query, bool): attends_because_of_a_b_schooling = pd.Series(a_b_query, index=states.index) elif isinstance(a_b_query, str): date = get_date(states) a_b_eligible = states.eval(a_b_query) if a_b_rhythm == "weekly": in_attend_group = states["educ_a_b_identifier"] == (date.week % 2 == 1) elif a_b_rhythm == "daily": in_attend_group = states["educ_a_b_identifier"] == (date.day % 2 == 1) attends_because_of_a_b_schooling = a_b_eligible & in_attend_group else: raise ValueError( f"a_b_query must be either bool or str, you supplied a {type(a_b_query)}" ) return attends_because_of_a_b_schooling
def _pupils_having_vacations_do_not_attend(attends_facility, states, params): """Make pupils stay away from school if their state has vacations.""" attends_facility = attends_facility.copy(deep=True) date = get_date(states) states_w_vacations = get_states_w_vacations(date, params).keys() has_vacation = states.state.isin(states_w_vacations) attends_facility.loc[attends_facility & has_vacation] = False return attends_facility
def demand_test( states, params, seed, share_of_tests_for_symptomatics_series, ): """Test demand function. Contrary to the name this function combines test demand and test allocation. Args: states (pandas.DataFrame): The states of the individuals. params (pandas.DataFrame): A DataFrame with parameters. It needs to contain the entry ("test_demand", "symptoms", "share_symptomatic_requesting_test"). seed (int): Seed for reproducibility. share_of_tests_for_symptomatics_series (pandas.Series): Series with date index that indicates the share of positive tests that discovered a symptomatic case. Returns: demand_probability (numpy.ndarray, pandas.Series): An array or a series which contains the probability for each individual demanding a test. """ np.random.seed(seed) date = get_date(states) with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message="indexing past lexsort depth may impact performance.") loc = ("test_demand", "shares", "share_w_positive_rapid_test_requesting_test") share_requesting_confirmation = params.loc[loc, "value"] params_slice = params.loc[("share_known_cases", "share_known_cases")] share_known_cases = get_piecewise_linear_interpolation_for_one_day( date, params_slice) share_of_tests_for_symptomatics = share_of_tests_for_symptomatics_series[ date] test_demand_from_share_known_cases = _calculate_test_demand_from_share_known_cases( states=states, share_known_cases=share_known_cases, share_of_tests_for_symptomatics=share_of_tests_for_symptomatics, ) test_demand_from_rapid_tests = _calculate_test_demand_from_rapid_tests( states, share_requesting_confirmation) demanded = test_demand_from_share_known_cases | test_demand_from_rapid_tests return demanded
def _process_multiplier(states, multiplier, name): if isinstance(multiplier, (pd.Series, pd.DataFrame)): date = get_date(states) multiplier = multiplier.loc[date] neg_multiplier_msg = f"Work {name} multiplier < 0 on {get_date(states)}" if isinstance(multiplier, (float, int)): assert 0 <= multiplier, neg_multiplier_msg else: assert (multiplier >= 0).all(), neg_multiplier_msg return multiplier
def _sample_imported_infections(states, params, seed): np.random.seed(seed) date = get_date(states) start_date = pd.Timestamp(params.index.min()) end_date = pd.Timestamp(params.index.max()) if start_date <= date <= end_date: n_cases_per_hundred_thousand = get_piecewise_linear_interpolation_for_one_day( date, params) n_cases = int(n_cases_per_hundred_thousand * len(states) / 100_000) pool = states.index[~states["immune"]] sampled = np.random.choice(pool, size=n_cases, replace=False) else: sampled = pd.Series(False, index=states.index) return sampled
def validate_prepared_initial_states(states, duration): columns_with_nans = ["pending_test_date", "virus_strain"] if np.any(states.drop(columns=columns_with_nans, errors="ignore").isna()): raise ValueError("'initial_states' are not allowed to contain NaNs.") for column in BOOLEAN_STATE_COLUMNS: if states[column].dtype != "bool": raise ValueError(f"Column '{column}' must be a boolean.") end_previous_simulation = get_date(states) new_start_simulation = end_previous_simulation + pd.Timedelta(1, unit="day") if not new_start_simulation == duration["start"]: raise ValueError( "The resumed simulation does not start where the former ended. The former " f"ended on {end_previous_simulation.date()} and should be continued on " f"{new_start_simulation.date()}, but the specified 'duration' starts " f"{duration['start'].date()}.")
def attends_educational_facility(states, params, id_column, seed): # noqa: U100 """Indicate which children go to an educational facility. Children go to an educational facility on weekdays. During vacations, all children do not go to educational facilities. Furthermore, there is a probability that children stay at home when they experience symptoms or receive a positive test result. Args: states (pandas.DataFrame): The states given by sid. params (pandas.DataFrame): DataFrame with three category levels, id_column (str): name of the column in *states* that identifies which pupils and adults belong to a group. Returns: attends_facility (pandas.Series): It is a series with the same index as states. The values are one for children that go to the facility and zero for those who do not. """ facility, _, _, digit = id_column.split("_") model_name = f"educ_{facility}_{digit}" date = get_date(states) day = date.day_name() if day in ["Saturday", "Sunday"]: attends_facility = pd.Series(data=False, index=states.index) else: attends_facility = states[id_column] != -1 attends_facility = _pupils_having_vacations_do_not_attend( attends_facility, states, params) for params_entry, condition in [ ("symptomatic_multiplier", states["symptomatic"]), ("positive_test_multiplier", states["knows_currently_infected"]), ]: attends_facility = reduce_contacts_on_condition( attends_facility, states, params.loc[(model_name, params_entry, params_entry), "value"], condition, is_recurrent=True, ) return attends_facility
def go_to_weekly_meeting( states, params, group_col_name, day_of_week, seed # noqa: U100 ): """Return who participates in a weekly meeting. Args: states (pandas.DataFrame): sid states DataFrame params (pandas.DataFrame): DataFrame with two index levels, subcategory and name. group_col_name (str): name of the column identifying this contact model's group column. day_of_week (str): day of the week on which this model takes place. Returns: attends_meeting (pandas.Series): same index as states. 1 for workers that go to the weekly meeting today. """ date = get_date(states) day = date.day_name() if day != day_of_week: attends_meeting = pd.Series(data=False, index=states.index) else: attends_meeting = states[group_col_name] != -1 for params_entry, condition in [ ("symptomatic_multiplier", states["symptomatic"]), ("positive_test_multiplier", states["knows_currently_infected"]), ]: attends_meeting = reduce_contacts_on_condition( attends_meeting, states, params.loc[(params_entry, params_entry), "value"], condition, is_recurrent=True, ) return attends_meeting
def meet_other_non_recurrent_contacts(states, params, seed): """Meet other non recurrent contacts. Individuals in households with educ_workers, retired and children have additional contacts during vacations. """ contacts = calculate_non_recurrent_contacts_from_empirical_distribution( states=states, params=params.loc["other_non_recurrent"], seed=seed, on_weekends=True, query=None, reduce_on_condition=False, ) affected_in_case_of_vacation = _identify_ppl_affected_by_vacation(states) date = get_date(states) state_to_vacation = get_states_w_vacations(date, params) potential_vacation_contacts = _draw_potential_vacation_contacts( states, params, state_to_vacation, seed) vacation_contacts = potential_vacation_contacts.where( affected_in_case_of_vacation, 0) contacts = contacts + vacation_contacts for params_entry, condition in [ ("symptomatic_multiplier", states["symptomatic"]), ("positive_test_multiplier", states["knows_currently_infected"]), ]: contacts = reduce_contacts_on_condition( contacts, states, params.loc[("other_non_recurrent", params_entry, params_entry), "value"], condition, is_recurrent=False, ) contacts = contacts.astype(int) return contacts
def reopen_other_model( states, contacts, seed, start_multiplier, end_multiplier, start_date, end_date, is_recurrent, params=None, # noqa: U100 ): """Reduce non-work contacts to active people in gradual opening or closing phase. This is for example used to model the gradual reopening after the first lockdown in Germany (End of April 2020 to beginning of October 2020). Args: start_multiplier (float): Activity level at start. end_multiplier (float): Activity level at end. start_date (str or pandas.Timestamp): Date at which the interpolation phase starts. end_date (str or pandas.Timestamp): Date at which the interpolation phase ends. """ date = get_date(states) multiplier = _interpolate_activity_level( date=date, start_multiplier=start_multiplier, end_multiplier=end_multiplier, start_date=start_date, end_date=end_date, ) if is_recurrent: reduced = reduce_recurrent_model(states, contacts, seed, multiplier) else: reduced = multiplier * contacts return reduced
def calculate_non_recurrent_contacts_from_empirical_distribution( states, params, on_weekends, seed, query=None, reduce_on_condition=True): """Draw how many non recurrent contacts each person will have today. Args: states (pandas.DataFrame): sid states DataFrame. params (pandas.DataFrame): DataFrame with two index levels, subcategory and name. has a "value" column that contains the probabilities to the number of possible columns in the "name" index level. on_weekends (bool or str): whether to meet on weekends or not. If it's a string it's interpreted as the prefix of columns identifying who participates in this contact model on weekends. Then, columns of the form "{on_weekends}_saturday" and "{on_weekends}_sunday" must be in states. query (str): query string to identify the subset of individuals to which this contact model applies. Returns: contacts (pandas.Series): index is the same as states. values is the number of contacts. """ date = get_date(states) day = date.day_name() contacts = pd.Series(0, index=states.index) if not on_weekends and day in ["Saturday", "Sunday"]: pass else: if isinstance(on_weekends, str) and day in ["Saturday", "Sunday"]: participating_today = states[f"{on_weekends}_{day.lower()}"] is_participating = states.eval(query) & participating_today else: if query is not None: is_participating = states.eval(query) else: is_participating = pd.Series(True, index=states.index) distribution = params.query( "~subcategory.str.contains('multiplier')")["value"] contacts[is_participating] = _draw_nr_of_contacts( distribution=distribution, is_participating=is_participating, states=states, seed=seed, ) if reduce_on_condition: for params_entry, condition in [ ("symptomatic_multiplier", states["symptomatic"]), ("positive_test_multiplier", states["knows_currently_infected"]), ]: contacts = reduce_contacts_on_condition( contacts, states, params.loc[(params_entry, params_entry), "value"], condition, is_recurrent=False, ) contacts = contacts.astype(float) return contacts
def test_get_date(df, expectation, expected): with expectation: result = get_date(df) assert result == expected
def rapid_test_demand( receives_rapid_test, # noqa: U100 states, params, contacts, seed, save_path=None, randomize=False, share_refuser=None, ): """Assign rapid tests to group. Starting after Easter, all education workers and pupils attending school receive a test if they participate in school and haven't received a rapid test within 4 days. Workers also get tested and more so as time increases. Lastly, household members of individuals with symptoms, a positive PCR test or a positive rapid test demand a rapid test with 85% probability. If randomize is True the calculated demand is distributed randomly in the entire population (excluding a share of refusers). """ date = get_date(states) # get params subsets with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message="indexing past lexsort depth may impact performance.") work_offer_params = params.loc[("rapid_test_demand", "share_workers_receiving_offer")] work_accept_params = params.loc[("rapid_test_demand", "share_accepting_work_offer")] educ_workers_params = params.loc[("rapid_test_demand", "educ_worker_shares")] students_params = params.loc[("rapid_test_demand", "student_shares")] private_demand_params = params.loc[("rapid_test_demand", "private_demand")] # get work demand inputs share_of_workers_with_offer = get_piecewise_linear_interpolation_for_one_day( date, work_offer_params) share_workers_accepting_offer = get_piecewise_linear_interpolation_for_one_day( date, work_accept_params) work_compliance_multiplier = (share_of_workers_with_offer * share_workers_accepting_offer) # get educ demand inputs educ_worker_multiplier = get_piecewise_linear_interpolation_for_one_day( date, educ_workers_params) student_multiplier = get_piecewise_linear_interpolation_for_one_day( date, students_params) if date < pd.Timestamp("2021-04-06"): freq_tup = ("rapid_test_demand", "educ_frequency", "before_easter") else: freq_tup = ("rapid_test_demand", "educ_frequency", "after_easter") educ_frequency = params.loc[freq_tup, "value"] # get household member inputs private_demand_share = get_piecewise_linear_interpolation_for_one_day( date, private_demand_params) work_demand = _calculate_work_rapid_test_demand( states=states, contacts=contacts, compliance_multiplier=work_compliance_multiplier, ) educ_demand = _calculate_educ_rapid_test_demand( states=states, contacts=contacts, educ_worker_multiplier=educ_worker_multiplier, student_multiplier=student_multiplier, frequency=educ_frequency, ) hh_demand = _calculate_hh_member_rapid_test_demand( states=states, demand_share=private_demand_share) sym_without_pcr_demand = _calculate_own_symptom_rapid_test_demand( states=states, demand_share=private_demand_share) other_contact_demand = _calculate_other_meeting_rapid_test_demand( states=states, contacts=contacts, demand_share=private_demand_share) private_demand = hh_demand | sym_without_pcr_demand | other_contact_demand rapid_test_demand = work_demand | educ_demand | private_demand if randomize and date > pd.Timestamp( "2021-04-05"): # only randomize after Easter assert ( share_refuser is not None ), "You must specify a share of individuals that refuse to take a rapid test" target_share_to_be_tested = rapid_test_demand.mean() rapid_test_demand = _randomize_rapid_tests( states=states, target_share_to_be_tested=target_share_to_be_tested, share_refuser=share_refuser, seed=seed, ) if save_path is not None: demand_by_channel = pd.DataFrame({ "private": private_demand, "work": work_demand, "educ": educ_demand, # could also include "hh", "sym_without_pcr", "other_contact" }) if randomize: demand_by_channel["random"] = rapid_test_demand shares = create_rapid_test_statistics( demand_by_channel=demand_by_channel, states=states, date=date, params=params) if not save_path.exists(): # want to save with columns to_add = shares.T.to_csv() else: # want to save without columns to_add = shares.T.to_csv().split("\n", 1)[1] with open(save_path, "a") as f: f.write(to_add) return rapid_test_demand