def _sample_which_individuals_demand_a_test( demand_probabilities: pd.DataFrame, seed: itertools.count ) -> pd.Series: """Sample which individuals demand a test. At first, compute the probabilities that each individual will demand no test at all and the corresponding probability that an individual demands at least one test. Then, sample individuals which demand a test. Args: demand_probabilities (pandas.DataFrame): Contains for each individual and every demand model the probability that the individual will request a test. seed (itertools.count): The seed counter. Returns: demands_test (pandas.Series): A boolean series indicating individuals who demand a test. """ np.random.seed(next(seed)) probability_demands_any_test = 1 - (1 - demand_probabilities).prod(axis=1) demands_test = pd.Series( index=demand_probabilities.index, data=boolean_choices(probability_demands_any_test.to_numpy()), ) return demands_test
def reduce_recurrent_model( states, contacts, seed, multiplier, params=None # noqa: U100 ): """Reduce the number of recurrent contacts taking place by a multiplier. For recurrent contacts the contacts Series is boolean. Therefore, simply multiplying the number of contacts with it would not have an effect on the number of contacts taking place. Instead we make a random share of individuals scheduled to participate not participate. Args: multiplier (float or pd.Series): Must be smaller or equal to one. If a Series is supplied the index must be dates. Returns: reduced (pandas.Series): same index as states. For a *multiplier* fraction of the population the contacts have been set to False. The more individuals already had a False there, the smaller the effect. """ np.random.seed(seed) if isinstance(multiplier, pd.Series): date = get_date(states) multiplier = multiplier[date] contacts = contacts.to_numpy() resampled_contacts = boolean_choices(np.full(len(states), multiplier)) reduced = np.where(contacts, resampled_contacts, contacts) return pd.Series(reduced, index=states.index)
def test_create_initial_immunity_lln(synthetic_data): full_synthetic_data = pd.concat([synthetic_data] * 50000).reset_index(drop=True) # synthetic data implies 20% old A, 30% young A, 30% old B, 20% young B np.random.seed(338) draw_prob = {"young": 0.2, "old": 0.1} individual_level_cases = pd.concat([synthetic_data] * 5000).reset_index(drop=True) individual_level_cases[pd.Timestamp("2020-03-03")] = boolean_choices( individual_level_cases["age_group_rki"].map(draw_prob.get)) empirical_group_sizes = individual_level_cases.groupby( ["county", "age_group_rki"]).size() population_size = len(individual_level_cases) cases_by_county_and_age_group = individual_level_cases.groupby( ["county", "age_group_rki"]).sum() empirical_infections = cases_by_county_and_age_group.stack() empirical_infections.index.names = ["county", "age_group_rki", "date"] empirical_infections = empirical_infections.reset_index() empirical_infections = empirical_infections.set_index( ["date", "county", "age_group_rki"]) empirical_infections = empirical_infections[0] empirical_infections.name = "newly_infected" expected_shares = empirical_infections["2020-03-03"] / empirical_group_sizes initial_infections = pd.DataFrame(index=full_synthetic_data.index) to_draw = len(full_synthetic_data) initial_infections["2020-03-02"] = np.random.choice(a=[True, False], size=to_draw, p=[0.01, 0.99]) initial_infections["2020-03-03"] = np.random.choice(a=[True, False], size=to_draw, p=[0.01, 0.99]) res = create_initial_immunity( empirical_infections=empirical_infections, synthetic_data=full_synthetic_data, initial_infections=initial_infections, population_size=population_size, date="2020-03-04", seed=3399, reporting_delay=0, ) full_synthetic_data["resulting_immune"] = res grouped = full_synthetic_data.groupby(["county", "age_group_rki"]) resulting_immune_shares = grouped["resulting_immune"].mean() pd.testing.assert_series_equal( resulting_immune_shares.sort_index().round(2), expected_shares.sort_index().round(2), check_names=False, )
def _draw_potential_vacation_contacts(states, params, state_to_vacation, seed): np.random.seed(seed) fed_state_to_p_contact = { fed_state: 0 for fed_state in states["state"].unique() } for fed_state, vacation in state_to_vacation.items(): loc = ("additional_other_vacation_contact", "probability", vacation) fed_state_to_p_contact[fed_state] = params.loc[loc, "value"] p_contact = states["state"].map(fed_state_to_p_contact.get) vacation_contact = pd.Series(boolean_choices(p_contact), index=states.index) vacation_contact = vacation_contact.astype(int) return vacation_contact
def _sample_test_outcome(states, receives_rapid_test, params, seed): """Sample the outcomes of the rapid tests. For those who are infectious, sensitivity gives us the probability that they are also tested positive. For those who are not infectious, 1 - specificity gives us the probability that they are falsely tested positive. """ np.random.seed(next(seed)) is_tested_positive = pd.Series(index=states.index, data=False) with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message="indexing past lexsort depth may impact performance.") sensitivity_params = params.loc[("rapid_test", "sensitivity"), "value"] infected = states["cd_infectious_true"] >= -10 receives_test_and_is_infected = infected & receives_rapid_test sensitivity = _create_sensitivity( states=states[receives_test_and_is_infected], sensitivity_params=sensitivity_params, ) is_truly_positive = boolean_choices(sensitivity) is_tested_positive.loc[receives_test_and_is_infected] = is_truly_positive specificity = params.loc[("rapid_test", "specificity", "specificity"), "value"] uninfected_test_receivers = ~infected & receives_rapid_test p_false_positive = np.full(uninfected_test_receivers.sum(), 1 - specificity) is_falsely_positive = boolean_choices(p_false_positive) is_tested_positive.loc[uninfected_test_receivers] = is_falsely_positive return is_tested_positive
def create_initial_immunity( empirical_infections, synthetic_data, initial_infections, date, seed, reporting_delay, population_size, ): """Create a Series with initial immunity. Args: empirical_infections (pandas.Series): Newly infected Series with the index levels ["date", "county", "age_group_rki"]. These must already be corrected to include undetected cases. synthetic_data (pandas.DataFrame): Dataset with one row per simulated individual. Must contain the columns age_group_rki and county. initial_infections (pandas.DataFrame): DataFrame with same index as synthetic_data and one column for each day until *date*. Dtype is boolean. It is assumed that these already include undetected cases. seed (int) reporting_delay (int): Number of days by which the reporting of cases is delayed. If given, later days are used to get the infections of the demanded time frame. population_size (int): Size of the population behind the empirical_infections. Returns: pd.Series: Boolean series with same index as synthetic_data. """ date_with_delay = pd.Timestamp(date) + pd.Timedelta(days=reporting_delay) empirical_infections = empirical_infections[:date_with_delay].sort_index() initial_before_date = [ pd.Timestamp(col) <= date_with_delay for col in initial_infections ] assert all( initial_before_date), f"Initial infections must lie before {date}." index_cols = ["date", "county", "age_group_rki"] correct_index_levels = empirical_infections.index.names == index_cols assert correct_index_levels, f"Your data must have {index_cols} as index levels." duplicates_in_index = empirical_infections.index.duplicated().any() assert not duplicates_in_index, "Your index must not have any duplicates." endog_immune = initial_infections.any(axis=1) total_immune = empirical_infections.groupby(["age_group_rki", "county"]).sum() total_immunity_prob = _calculate_total_immunity_prob( total_immune, synthetic_data, population_size, ) endog_immunity_prob = _calculate_endog_immunity_prob( initial_infections, synthetic_data, ) exog_immunity_prob = _calculate_exog_immunity_prob(total_immunity_prob, endog_immunity_prob) np.random.seed(seed) # need to duplicate exog prob on synthetical data hypothetical_exog_prob = pd.merge( synthetic_data, exog_immunity_prob, left_on=["age_group_rki", "county"], right_index=True, validate="m:1", )["exog_immunity_prob"] hypothetical_exog_prob = hypothetical_exog_prob.reindex( synthetic_data.index) hypothetical_exog_choice = pd.Series( boolean_choices(hypothetical_exog_prob.to_numpy()), index=synthetic_data.index, ) return hypothetical_exog_choice.where(~endog_immune, endog_immune)