Esempio n. 1
0
def _sample_which_individuals_demand_a_test(
    demand_probabilities: pd.DataFrame, seed: itertools.count
) -> pd.Series:
    """Sample which individuals demand a test.

    At first, compute the probabilities that each individual will demand no test at all
    and the corresponding probability that an individual demands at least one test.

    Then, sample individuals which demand a test.

    Args:
        demand_probabilities (pandas.DataFrame): Contains for each individual and every
            demand model the probability that the individual will request a test.
        seed (itertools.count): The seed counter.

    Returns:
        demands_test (pandas.Series): A boolean series indicating individuals who demand
            a test.

    """
    np.random.seed(next(seed))

    probability_demands_any_test = 1 - (1 - demand_probabilities).prod(axis=1)

    demands_test = pd.Series(
        index=demand_probabilities.index,
        data=boolean_choices(probability_demands_any_test.to_numpy()),
    )

    return demands_test
def reduce_recurrent_model(
        states,
        contacts,
        seed,
        multiplier,
        params=None  # noqa: U100
):
    """Reduce the number of recurrent contacts taking place by a multiplier.

    For recurrent contacts the contacts Series is boolean.
    Therefore, simply multiplying the number of contacts with it would not have
    an effect on the number of contacts taking place. Instead we make a random share of
    individuals scheduled to participate not participate.

    Args:
        multiplier (float or pd.Series): Must be smaller or equal to one. If a
            Series is supplied the index must be dates.


    Returns:
        reduced (pandas.Series): same index as states. For a *multiplier* fraction
            of the population the contacts have been set to False. The more individuals
            already had a False there, the smaller the effect.

    """
    np.random.seed(seed)
    if isinstance(multiplier, pd.Series):
        date = get_date(states)
        multiplier = multiplier[date]

    contacts = contacts.to_numpy()
    resampled_contacts = boolean_choices(np.full(len(states), multiplier))

    reduced = np.where(contacts, resampled_contacts, contacts)
    return pd.Series(reduced, index=states.index)
Esempio n. 3
0
def test_create_initial_immunity_lln(synthetic_data):
    full_synthetic_data = pd.concat([synthetic_data] *
                                    50000).reset_index(drop=True)
    # synthetic data implies 20% old A, 30% young A, 30% old B, 20% young B

    np.random.seed(338)
    draw_prob = {"young": 0.2, "old": 0.1}
    individual_level_cases = pd.concat([synthetic_data] *
                                       5000).reset_index(drop=True)
    individual_level_cases[pd.Timestamp("2020-03-03")] = boolean_choices(
        individual_level_cases["age_group_rki"].map(draw_prob.get))
    empirical_group_sizes = individual_level_cases.groupby(
        ["county", "age_group_rki"]).size()
    population_size = len(individual_level_cases)
    cases_by_county_and_age_group = individual_level_cases.groupby(
        ["county", "age_group_rki"]).sum()
    empirical_infections = cases_by_county_and_age_group.stack()
    empirical_infections.index.names = ["county", "age_group_rki", "date"]
    empirical_infections = empirical_infections.reset_index()
    empirical_infections = empirical_infections.set_index(
        ["date", "county", "age_group_rki"])
    empirical_infections = empirical_infections[0]
    empirical_infections.name = "newly_infected"

    expected_shares = empirical_infections["2020-03-03"] / empirical_group_sizes

    initial_infections = pd.DataFrame(index=full_synthetic_data.index)
    to_draw = len(full_synthetic_data)
    initial_infections["2020-03-02"] = np.random.choice(a=[True, False],
                                                        size=to_draw,
                                                        p=[0.01, 0.99])
    initial_infections["2020-03-03"] = np.random.choice(a=[True, False],
                                                        size=to_draw,
                                                        p=[0.01, 0.99])
    res = create_initial_immunity(
        empirical_infections=empirical_infections,
        synthetic_data=full_synthetic_data,
        initial_infections=initial_infections,
        population_size=population_size,
        date="2020-03-04",
        seed=3399,
        reporting_delay=0,
    )

    full_synthetic_data["resulting_immune"] = res
    grouped = full_synthetic_data.groupby(["county", "age_group_rki"])
    resulting_immune_shares = grouped["resulting_immune"].mean()

    pd.testing.assert_series_equal(
        resulting_immune_shares.sort_index().round(2),
        expected_shares.sort_index().round(2),
        check_names=False,
    )
def _draw_potential_vacation_contacts(states, params, state_to_vacation, seed):
    np.random.seed(seed)
    fed_state_to_p_contact = {
        fed_state: 0
        for fed_state in states["state"].unique()
    }
    for fed_state, vacation in state_to_vacation.items():
        loc = ("additional_other_vacation_contact", "probability", vacation)
        fed_state_to_p_contact[fed_state] = params.loc[loc, "value"]
    p_contact = states["state"].map(fed_state_to_p_contact.get)
    vacation_contact = pd.Series(boolean_choices(p_contact),
                                 index=states.index)
    vacation_contact = vacation_contact.astype(int)
    return vacation_contact
Esempio n. 5
0
def _sample_test_outcome(states, receives_rapid_test, params, seed):
    """Sample the outcomes of the rapid tests.

    For those who are infectious, sensitivity gives us the probability that they are
    also tested positive.

    For those who are not infectious, 1 - specificity gives us the probability that they
    are falsely tested positive.

    """
    np.random.seed(next(seed))
    is_tested_positive = pd.Series(index=states.index, data=False)

    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="indexing past lexsort depth may impact performance.")
        sensitivity_params = params.loc[("rapid_test", "sensitivity"), "value"]

    infected = states["cd_infectious_true"] >= -10
    receives_test_and_is_infected = infected & receives_rapid_test
    sensitivity = _create_sensitivity(
        states=states[receives_test_and_is_infected],
        sensitivity_params=sensitivity_params,
    )
    is_truly_positive = boolean_choices(sensitivity)
    is_tested_positive.loc[receives_test_and_is_infected] = is_truly_positive

    specificity = params.loc[("rapid_test", "specificity", "specificity"),
                             "value"]
    uninfected_test_receivers = ~infected & receives_rapid_test
    p_false_positive = np.full(uninfected_test_receivers.sum(),
                               1 - specificity)
    is_falsely_positive = boolean_choices(p_false_positive)

    is_tested_positive.loc[uninfected_test_receivers] = is_falsely_positive
    return is_tested_positive
def create_initial_immunity(
    empirical_infections,
    synthetic_data,
    initial_infections,
    date,
    seed,
    reporting_delay,
    population_size,
):
    """Create a Series with initial immunity.

    Args:
        empirical_infections (pandas.Series): Newly infected Series with the index
            levels ["date", "county", "age_group_rki"]. These must already be
            corrected to include undetected cases.
        synthetic_data (pandas.DataFrame): Dataset with one row per simulated
            individual. Must contain the columns age_group_rki and county.
        initial_infections (pandas.DataFrame): DataFrame with same index as
            synthetic_data and one column for each day until *date*.
            Dtype is boolean. It is assumed that these already include
            undetected cases.
        seed (int)
        reporting_delay (int): Number of days by which the reporting of cases is
            delayed. If given, later days are used to get the infections of the
            demanded time frame.
        population_size (int): Size of the population behind the empirical_infections.

    Returns:
        pd.Series: Boolean series with same index as synthetic_data.

    """
    date_with_delay = pd.Timestamp(date) + pd.Timedelta(days=reporting_delay)
    empirical_infections = empirical_infections[:date_with_delay].sort_index()

    initial_before_date = [
        pd.Timestamp(col) <= date_with_delay for col in initial_infections
    ]
    assert all(
        initial_before_date), f"Initial infections must lie before {date}."

    index_cols = ["date", "county", "age_group_rki"]
    correct_index_levels = empirical_infections.index.names == index_cols
    assert correct_index_levels, f"Your data must have {index_cols} as index levels."
    duplicates_in_index = empirical_infections.index.duplicated().any()
    assert not duplicates_in_index, "Your index must not have any duplicates."

    endog_immune = initial_infections.any(axis=1)

    total_immune = empirical_infections.groupby(["age_group_rki",
                                                 "county"]).sum()

    total_immunity_prob = _calculate_total_immunity_prob(
        total_immune,
        synthetic_data,
        population_size,
    )
    endog_immunity_prob = _calculate_endog_immunity_prob(
        initial_infections,
        synthetic_data,
    )

    exog_immunity_prob = _calculate_exog_immunity_prob(total_immunity_prob,
                                                       endog_immunity_prob)

    np.random.seed(seed)
    # need to duplicate exog prob on synthetical data
    hypothetical_exog_prob = pd.merge(
        synthetic_data,
        exog_immunity_prob,
        left_on=["age_group_rki", "county"],
        right_index=True,
        validate="m:1",
    )["exog_immunity_prob"]
    hypothetical_exog_prob = hypothetical_exog_prob.reindex(
        synthetic_data.index)

    hypothetical_exog_choice = pd.Series(
        boolean_choices(hypothetical_exog_prob.to_numpy()),
        index=synthetic_data.index,
    )
    return hypothetical_exog_choice.where(~endog_immune, endog_immune)