def test_negative_test_and_symptoms(self):
        # Should hit "recovered" and just default to lab results
        house = Household(env=self.env,
                          rng=self.rng,
                          conf=self.conf,
                          area=10,
                          name="household:1",
                          location_type="HOUSEHOLD",
                          lat=0,
                          lon=0,
                          capacity=5)
        house.residents = [self.human1]
        self.human1.household = house
        self.human1.set_test_info("lab", "negative")

        # modify the time and set the env
        self.env = Env(self.start_time + datetime.timedelta(days=3))
        self.human1.env = self.env

        reported_symptoms = [SEVERE]
        self.human1.rolling_all_reported_symptoms.appendleft(reported_symptoms)
        mailbox = {}
        risk_history = self.heuristic.compute_risk(self.human1, mailbox,
                                                   self.hd)
        assert self.human1._heuristic_rec_level == 0
        assert risk_history == [
            0.20009698, 0.20009698, 0.20009698, 0.20009698, 0.20009698,
            0.20009698, 0.20009698
        ]
    def test_handle_tests_negative_8_days(self):
        # The scenario is you get a negative lab test 8 days ago, but you got a moderate risk message two days ago.
        house = Household(env=self.env,
                          rng=self.rng,
                          conf=self.conf,
                          area=10,
                          name="household:1",
                          location_type="HOUSEHOLD",
                          lat=0,
                          lon=0,
                          capacity=5)
        house.residents = [self.human1]
        self.human1.household = house
        self.human1.set_test_info("lab", "negative")

        # modify the time
        self.env = Env(self.start_time + datetime.timedelta(days=8))
        self.human1.env = self.env

        rel_encounter_day = 2
        num_encounters = 1
        new_risk_level = 8
        clusters = [(rel_encounter_day, new_risk_level, num_encounters)]

        risk_history = self.heuristic.compute_risk(self.human1, clusters,
                                                   self.hd)

        assert self.human1._heuristic_rec_level == 2
        assert risk_history == [
            0.42782824, 0.01, 0.01, 0.01, 0.20009698, 0.20009698, 0.20009698,
            0.20009698, 0.20009698, 0.20009698, 0.20009698, 0.20009698
        ]
    def setUp(self):
        self.conf = get_test_conf("naive_local.yaml")
        self.start_time = datetime.datetime(2020, 2, 28, 0, 0)
        self.simulation_days = 40
        self.city_x_range = (0, 1000)
        self.city_y_range = (0, 1000)
        self.rng = np.random.RandomState(42)
        self.heuristic = Heuristic(version=1, conf=self.conf)
        self.env = Env(self.start_time)
        self.city = EmptyCity(self.env, self.rng, self.city_x_range,
                              self.city_y_range, self.conf)
        try:
            if self.city.tracker is None:
                self.city.tracker = TrackerMock()
        except AttributeError:
            self.city.tracker = TrackerMock()

        self.sr = _create_senior_residences(2, self.city, self.city.rng,
                                            self.conf)

        house = Household(
            env=self.city.env,
            rng=np.random.RandomState(self.city.rng.randint(2**16)),
            conf=self.conf,
            name=f"HOUSEHOLD:{len(self.city.households)}",
            location_type="HOUSEHOLD",
            lat=self.city.rng.randint(*self.city.x_range),
            lon=self.city.rng.randint(*self.city.y_range),
            area=None,
            capacity=None,
        )

        self.human1 = Human(env=self.city.env,
                            city=self.city,
                            name=1,
                            age=42,
                            rng=self.rng,
                            conf=self.conf)
        self.human1.assign_household(house)
        self.human1.has_app = True
        self.human1._rec_level = 0
        setattr(self.human1, "_heuristic_rec_level", 0)

        self.human2 = Human(env=self.city.env,
                            city=self.city,
                            name=2,
                            age=6 * 9,
                            rng=self.rng,
                            conf=self.conf)
        self.human2.assign_household(house)
        self.human2.has_app = True
        self.human1._rec_level = 0
        setattr(self.human2, "_heuristic_rec_level", 0)

        self.humans = [self.human1, self.human2]
        self.hd = {h.name: h for h in self.humans}
    def test_recovered(self):
        # they were at a rec level of 1, then they recovered
        clusters = []
        self.env = Env(self.start_time + datetime.timedelta(days=3))
        self.human1.env = self.env
        self.human1._rec_level = 1
        self.human1._heuristic_rec_level = 1

        risk_history = self.heuristic.compute_risk(self.human1, clusters,
                                                   self.hd)
        assert self.human1._heuristic_rec_level == 0
        assert risk_history == [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]
    def test_high_risk_history_no_new_signal(self):
        # they were at a rec level of 1, then they recovered
        clusters = []
        self.env = Env(self.start_time + datetime.timedelta(days=3))
        self.human1.env = self.env
        self.human1._rec_level = 3
        self.human1._heuristic_rec_level = 3
        self.human1.risk_history_map = {1: 0.9, 2: 0.9, 3: 0.9}

        risk_history = self.heuristic.compute_risk(self.human1, clusters,
                                                   self.hd)
        assert self.human1._heuristic_rec_level == 0
        assert risk_history == [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]
    def test_handle_tests_positive_lt_2_days(self):
        house = Household(env=self.env,
                          rng=self.rng,
                          conf=self.conf,
                          area=10,
                          name="household:1",
                          location_type="HOUSEHOLD",
                          lat=0,
                          lon=0,
                          capacity=5)
        house.residents = [self.human1]
        self.human1.household = house
        self.human1.set_test_info("lab", "positive")
        self.env = Env(self.start_time + datetime.timedelta(days=1))
        self.human1.env = self.env

        risk_history, rec_level = self.heuristic.handle_tests(self.human1)
        assert rec_level == 0
        assert risk_history == []
    def test_low_risk_message_and_severe_symptoms(self):
        self.env = Env(self.start_time + datetime.timedelta(days=3))
        self.human1.env = self.env
        reported_symptoms = [SEVERE]
        self.human1.rolling_all_reported_symptoms.appendleft(reported_symptoms)

        # Risk Level 1
        rel_encounter_day = 5
        num_encounters = 1
        new_risk_level = 1
        clusters = [(rel_encounter_day, new_risk_level, num_encounters)]

        risk_history = self.heuristic.compute_risk(self.human1, clusters,
                                                   self.hd)
        assert self.human1._heuristic_rec_level == 3
        assert risk_history == [
            0.94996601, 0.94996601, 0.94996601, 0.94996601, 0.94996601,
            0.94996601, 0.94996601
        ]
    def test_high_risk_message_and_mild_symptoms_diff_days(self):
        self.env = Env(self.start_time + datetime.timedelta(days=3))
        self.human1.env = self.env
        reported_symptoms = ["mild"]
        self.human1.rolling_all_reported_symptoms.appendleft(reported_symptoms)

        rel_encounter_day = 2
        num_encounters = 1
        new_risk_level = 13
        clusters = [(rel_encounter_day, new_risk_level, num_encounters)]

        risk_history = self.heuristic.compute_risk(self.human1, clusters,
                                                   self.hd)
        assert self.human1._heuristic_rec_level == 3
        # We get a hetereogenous array here because of the mixed signals between symptoms and risk messages
        assert risk_history == [
            0.8408014, 0.79687407, 0.79687407, 0.79687407, 0.79687407,
            0.79687407, 0.79687407
        ]
    def test_high_risk_history_mild_symptom(self):
        # they were at a rec level of 1, then they recovered
        clusters = []
        self.env = Env(self.start_time + datetime.timedelta(days=3))
        self.human1.env = self.env
        self.human1._rec_level = 3
        self.human1._heuristic_rec_level = 3
        self.human1.risk_history_map = {1: 0.9, 2: 0.9, 3: 0.9}
        reported_symptoms = ["mild"]
        self.human1.rolling_all_reported_symptoms.appendleft(reported_symptoms)

        risk_history = self.heuristic.compute_risk(self.human1, clusters,
                                                   self.hd)
        assert self.human1._heuristic_rec_level == 3
        # This basically says for the last three days you maintain the high risk signal, but then you update older signals.
        # not sure this would ever happen in the real algorithm since the only time we are really writing signals higher
        # than 0.79 is for positive test result that writes for 14 days.
        assert risk_history == [
            0.9, 0.9, 0.9, 0.79687407, 0.79687407, 0.79687407, 0.79687407
        ]
Example #10
0
    def test_handle_tests_negative_8_days_but_high_risk_before(self):
        # The scenario is you get a negative lab test 8 days ago, but you got a moderate risk message 12 days ago.
        # This triggers the "recovery" mode so you get 7 baseline risks instead of
        # [0.9, 0.9, 0.9, 0.9, 0.20009698, 0.20009698, 0.20009698, 0.20009698, 0.9]
        self.env = Env(self.start_time + datetime.timedelta(days=8))
        house = Household(env=self.env,
                          rng=self.rng,
                          conf=self.conf,
                          area=10,
                          name="household:1",
                          location_type="HOUSEHOLD",
                          lat=0,
                          lon=0,
                          capacity=5)
        house.residents = [self.human1]
        self.human1.household = house
        self.human1.env = self.env
        self.human1.risk_history_map = {
            1: 0.9,
            2: 0.9,
            3: 0.9,
            4: 0.9,
            5: 0.9,
            6: 0.9,
            7: 0.9,
            8: 0.9,
            9: 0.9
        }
        self.human1.set_test_info("lab", "negative")

        rel_encounter_day = 12
        num_encounters = 1
        new_risk_level = 8
        clusters = [(rel_encounter_day, new_risk_level, num_encounters)]
        risk_history = self.heuristic.compute_risk(self.human1, clusters,
                                                   self.hd)

        assert self.human1._heuristic_rec_level == 0
        assert risk_history == [0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]
Example #11
0
def test_basic_demographics(
        seed: int,
        test_conf_name: str,
        age_error_tol: float = 3.21,
        age_distribution_error_tol: float = 0.20,
        sex_diff_error_tol: float = 0.1,
        profession_error_tol: float = 0.03,
        fraction_over_100_error_tol: float = 0.01):
    """
        Tests for the about demographic statistics:
            - min, max, average and median population age
            - fraction of people over 100 years old
            - fraction difference between male and female
            - age distribution w.r.t to HUMAN_DISTRIBUTION
            - fraction of retired people
            - fraction of people working in healthcare
            - fraction of people working in education

    Reference values are from Canada statistics - census profile 2016 (ref: https://tinyurl.com/qsf2q8d)

    Args:
        test_conf_name (str): the filename of the configuration file used for testing
        age_error_tol (float): tolerance about average and median age discrepancy w.r.t. official statistics
        age_distribution_error_tol (float): tolerance about the population fraction assigned to each age group
        profession_error_tol (float): tolerance about the population fraction assigned to each profession
    """

    conf = get_test_conf(test_conf_name)

    n_people = 5000
    init_fraction_sick = 0.01
    rng = np.random.RandomState(seed=seed)
    start_time = datetime.datetime(2020, 2, 28, 0, 0)
    env = Env(start_time)
    city_x_range = (0, 1000)
    city_y_range = (0, 1000)
    conf['simulation_days'] = 1
    city = City(
        env=env,
        n_people=n_people,
        init_fraction_sick=init_fraction_sick,
        rng=rng,
        x_range=city_x_range,
        y_range=city_y_range,
        conf=conf,
        logfile="logfile.txt",
    )
    city.have_some_humans_download_the_app()

    # Check that the actual population size is the same than specified
    assert len(city.humans) == n_people

    population = []
    for human in city.humans:
        population.append([
            human.age,
            human.sex,
        ])
    df = pd.DataFrame.from_records(
        data=population,
        columns=['age', 'sex']
    )

    # Check basic statistics about age
    canstat_avg_population_age = 41.
    assert math.fabs(df.age.mean() - canstat_avg_population_age) < age_error_tol, \
        f'The simulated average population age is {df.age.mean():.2f} ' \
        f'while the statistics for Canada is {canstat_avg_population_age:.2f}'

    canstat_median_population_age = 41.2
    assert math.fabs(df.age.median() - canstat_median_population_age) < age_error_tol, \
        f'The simulated median population age is {df.age.mean():.2f} ' \
        f'while the statistics for Canada is {canstat_avg_population_age:.2f}'

    minimum_age = 0
    assert df.age.min() >= minimum_age, f'There is a person with negative age.'

    maximum_age = 117  # Canadian record: Marie-Louise Meilleur
    assert df.age.max() < maximum_age, f'There is a person older than the Canadian longevity record.'

    # Check basic statistics about sex
    canstat_sex_rel_diff = 0.018
    sex_grouped = df.groupby('sex').count()
    sex_grouped = sex_grouped.apply(lambda x: x / n_people)
    sex_rel_diff = math.fabs(sex_grouped.age['male'] - sex_grouped.age['female'])
    assert (math.fabs(sex_rel_diff - canstat_sex_rel_diff) < sex_diff_error_tol), \
        f'The relative difference between male and female in the population is {sex_rel_diff} ' \
        f'while the actual number for Canada is {canstat_sex_rel_diff}'

    fraction_other_sex = sex_grouped.age['other']
    assert math.fabs(fraction_other_sex - 0.1) < sex_diff_error_tol, \
        f'The relative difference between other and the one specified in config ' \
        f'is too high (diff={fraction_other_sex - 0.1})'

    # Check that the simulated age distribution is similar to the one specified in HUMAN_DISTRIBUTION
    age_histogram = {}
    for x1, x2, p in conf.get('P_AGE_REGION'):
        age_histogram[(x1, x2)] = p
    intervals = pd.IntervalIndex.from_tuples(age_histogram.keys(), closed='both')
    age_grouped = df.groupby(pd.cut(df['age'], intervals))
    age_grouped = age_grouped.agg({'age': 'count'})

    assert age_grouped.age.sum() == n_people
    age_grouped = age_grouped.age.apply(lambda x: x / n_people)
    assert np.allclose(age_grouped.to_numpy(), np.array(list(age_histogram.values())), atol=age_distribution_error_tol)
def test_functional_seniors_residence():
    """ Run a simulation of 1 infection in a seniors residence, and perform some sanity checks """

    with tempfile.TemporaryDirectory() as output_dir:

        rng = np.random.RandomState(42)

        # Config
        start_time = datetime.datetime(2020, 2, 28, 0, 0)
        simulation_days = 100
        city_x_range = (0, 1000)
        city_y_range = (0, 1000)

        # Find the test_configs directory, and load the required config yaml
        conf = get_test_conf("naive_local.yaml")
        conf['simulation_days'] = simulation_days
        conf['COVID_SPREAD_START_TIME'] = start_time
        conf['INTERVENTION_START_TIME'] = start_time
        conf['_MEAN_DAILY_UNKNOWN_CONTACTS'] = 0.5

        env = Env(start_time)
        city = EmptyCity(env, rng, city_x_range, city_y_range, conf)
        sr = Household(
                        env=env,
                        rng=np.random.RandomState(rng.randint(2 ** 16)),
                        conf=conf,
                        name=f"SENIOR_RESIDENCE:0",
                        location_type="SENIOR_RESIDENCE",
                        lat=rng.randint(*city_x_range),
                        lon=rng.randint(*city_y_range),
                        area=1000,
                        capacity=None,
                    )
        city.senior_residences.append(sr)

        N = 10

        # Create humans
        ages = city.rng.randint(*(65, 100), size=N)

        infection = [None] * N
        # One initial infection
        infection[0] = city.start_time
        city.n_init_infected = 1

        humans = [
            Human(
                env=city.env,
                city=city,
                name=i,
                age=ages[i],
                rng=rng,
                conf=conf,
            )
            for i in range(N)
        ]

        for human in humans:
            human.assign_household(sr)
            sr.residents.append(human)
            human.mobility_planner.initialize()

        # pick some humans and make sure they cannot recover (for later checks)
        for i in range(N):
            humans[i].never_recovers = True
        # Infect one of the humans
        humans[np.random.randint(N)]._get_infected(1)

        city.humans = humans
        city.hd = {h.name: h for h in humans}
        city.initWorld()

        outfile = os.path.join(output_dir, "test1")

        env.process(city.run(SECONDS_PER_HOUR, outfile))

        for human in city.humans:
            env.process(human.run())

        with unittest.mock.patch.object(
                City, "run_app",
                new=fake_run_app) as mock:
            env.run(until=env.ts_initial+simulation_days*SECONDS_PER_DAY)

        # Check dead humans are removed from the residence
        assert sum([h.is_dead for h in city.humans]) == N - len(sr.residents)

        # Check there are some dead
        assert sum([h.is_dead for h in city.humans]) > 0

        # Check there are no humans that are infectious
        assert not any([h.is_infectious for h in city.humans])
def test_human_compute_covid_properties():
    """
    Test the covid properties of the class Human over a population for 3 ages
    """
    conf = get_test_conf("test_covid_testing.yaml")

    n_people = 1000
    init_fraction_sick = 0
    start_time = datetime.datetime(2020, 2, 28, 0, 0)
    city_x_range = (0, 1000)
    city_y_range = (0, 1000)

    env = Env(start_time)

    city = City(
        env,
        10,  # This test directly calls Human.compute_covid_properties() on a Human
        init_fraction_sick,
        np.random.RandomState(42),
        city_x_range,
        city_y_range,
        conf,
    )

    def _get_human_covid_properties(human):
        compute_covid_properties(human)

        assert human.viral_load_peak_start >= 0.5 - 0.00001
        assert human.incubation_days >= 2.0

        assert human.infectiousness_onset_days < human.incubation_days

        # viral_load_peak_start, viral_load_plateau_start and viral_load_plateau_
        # end are relative to infectiousness_onset_days
        assert human.infectiousness_onset_days < human.viral_load_peak_start + human.infectiousness_onset_days
        assert human.viral_load_peak_start + human.infectiousness_onset_days < \
               human.incubation_days
        assert human.incubation_days < human.viral_load_plateau_start + human.infectiousness_onset_days
        assert human.viral_load_plateau_start < human.viral_load_plateau_end
        assert human.viral_load_plateau_end + human.infectiousness_onset_days < \
               human.recovery_days

        # &infectiousness-onset [He 2020 https://www.nature.com/articles/s41591-020-0869-5#ref-CR1]
        # infectiousness started from 2.3 days (95% CI, 0.8–3.0 days) before symptom
        # onset and peaked at 0.7 days (95% CI, −0.2–2.0 days) before symptom onset (Fig. 1c).
        assert human.incubation_days - human.infectiousness_onset_days >= 0.5
        # TODO: re-add this bound
        # assert human.incubation_days - human.infectiousness_onset_days <= 4.3

        # &infectiousness-onset [He 2020 https://www.nature.com/articles/s41591-020-0869-5#ref-CR1]
        # infectiousness peaked at 0.7 days (95% CI, −0.2–2.0 days) before symptom onset (Fig. 1c).
        try:
            assert human.incubation_days - \
                   (human.viral_load_peak_start + human.infectiousness_onset_days) >= 0.01
        except AssertionError:
            # If the assert above fails, it can only be when we forced viral_load_peak_start
            # to 0.5 day after infectiousness_onset_days
            assert abs(human.viral_load_peak_start - 0.5) <= 0.00001
        assert human.incubation_days - \
               (human.viral_load_peak_start + human.infectiousness_onset_days) <= 2.2

        # Avg plateau duration
        # infered from https://www.medrxiv.org/content/10.1101/2020.04.10.20061325v2.full.pdf (Figure 1 & 4).
        # 8 is infered from Figure 4 by eye-balling.
        assert human.viral_load_plateau_end - human.viral_load_plateau_start >= 3.0
        assert human.viral_load_plateau_end - human.viral_load_plateau_start <= 9.0

        assert human.viral_load_peak_height >= conf[
            'MIN_VIRAL_LOAD_PEAK_HEIGHT']
        assert human.viral_load_peak_height <= conf[
            'MAX_VIRAL_LOAD_PEAK_HEIGHT']

        assert human.viral_load_plateau_height <= human.viral_load_peak_height

        # peak_plateau_slope must transit from viral_load_peak_height to
        # viral_load_plateau_height
        assert (human.viral_load_peak_height -
                human.peak_plateau_slope * (human.viral_load_plateau_start -
                                            human.viral_load_peak_start)) - \
               human.viral_load_plateau_height < 0.00001

        # peak_plateau_slope must transit from viral_load_plateau_height to 0.0
        assert human.viral_load_plateau_height - \
               human.plateau_end_recovery_slope * (human.recovery_days -
                                                   (human.viral_load_plateau_end +
                                                    human.infectiousness_onset_days)) < 0.00001

        return [
            human.infectiousness_onset_days, human.viral_load_peak_start,
            human.incubation_days, human.viral_load_plateau_start,
            human.viral_load_plateau_end, human.recovery_days,
            human.viral_load_peak_height, human.viral_load_plateau_height,
            human.peak_plateau_slope, human.plateau_end_recovery_slope
        ]

    human = city.humans[0]
    # Reset the rng
    human.rng = np.random.RandomState(42)
    # force is_asymptomatic to True since we are not testing the symptoms
    human.is_asymptomatic = True
    # force the age to a median
    human.age = 40
    covid_properties_samples = [
        _get_human_covid_properties(human) for _ in range(n_people)
    ]

    covid_properties_samples_mean = covid_properties_samples[0]
    for sample in covid_properties_samples[1:]:
        for i in range(len(covid_properties_samples_mean)):
            covid_properties_samples_mean[i] += sample[i]

    for i in range(len(covid_properties_samples_mean)):
        covid_properties_samples_mean[i] /= n_people

    infectiousness_onset_days_mean, viral_load_peak_start_mean, \
        incubation_days_mean, viral_load_plateau_start_mean, \
        viral_load_plateau_end_mean, recovery_days_mean, \
        viral_load_peak_height_mean, viral_load_plateau_height_mean, \
        peak_plateau_slope_mean, plateau_end_recovery_slope_mean = covid_properties_samples_mean

    # infectiousness_onset_days
    # &infectiousness-onset [He 2020 https://www.nature.com/articles/s41591-020-0869-5#ref-CR1]
    # infectiousness started from 2.3 days (95% CI, 0.8–3.0 days) before symptom
    # onset and peaked at 0.7 days (95% CI, −0.2–2.0 days) before symptom onset (Fig. 1c).
    # TODO: infectiousness_onset_days has a minimum of 1 which affects this mean. Validate this assert
    assert abs(infectiousness_onset_days_mean - 2.3) < 1.5, \
        f"The average of infectiousness_onset_days should be about {2.3}"

    # viral_load_peak_start
    # &infectiousness-onset [He 2020 https://www.nature.com/articles/s41591-020-0869-5#ref-CR1]
    # infectiousness peaked at 0.7 days (95% CI, −0.2–2.0 days) before symptom onset (Fig. 1c).
    assert abs(incubation_days_mean -
               (viral_load_peak_start_mean + infectiousness_onset_days_mean) - 0.7) < 0.5, \
        f"The average of viral_load_peak_start should be about {0.7}"

    # incubation_days
    # INCUBATION PERIOD
    # Refer Table 2 (Appendix) in https://www.acpjournals.org/doi/10.7326/M20-0504 for parameters of lognormal fit
    assert abs(incubation_days_mean - 5.807) < 0.5, \
        f"The average of infectiousness_onset_days should be about {5.807} days"

    # viral_load_plateau_start_mean, viral_load_plateau_end_mean
    # Avg plateau duration
    # infered from https://www.medrxiv.org/content/10.1101/2020.04.10.20061325v2.full.pdf (Figure 1 & 4).
    # 8 is infered from Figure 4 by eye-balling.
    assert abs(viral_load_plateau_end_mean - viral_load_plateau_start_mean) - 4.5 < 0.5, \
        f"The average of the plateau duration should be about {4.5} days"

    # (no-source) 14 is loosely defined.
    assert abs(recovery_days_mean - incubation_days_mean) - 14 < 0.5, \
        f"The average of the recovery time  should be about {14} days"

    # Test with a young and senior ages
    for age in (20, 75):
        human.age = age
        for _ in range(n_people):
            # Assert the covid properties
            _get_human_covid_properties(human)
def test_viral_load_for_day():
    """
    Test the sample over the viral load curve
    """
    conf = get_test_conf("test_covid_testing.yaml")

    init_fraction_sick = 0
    start_time = datetime.datetime(2020, 2, 28, 0, 0)
    city_x_range = (0, 1000)
    city_y_range = (0, 1000)

    env = Env(start_time)

    city = City(
        env,
        10,  # This test force the call Human.compute_covid_properties()
        init_fraction_sick,
        np.random.RandomState(42),
        city_x_range,
        city_y_range,
        conf,
    )

    human = city.humans[0]
    # force is_asymptomatic to True since we are not testing the symptoms
    human.is_asymptomatic = True
    # force the age to a median
    human.age = 40
    # Set infection date
    now = env.timestamp
    human.infection_timestamp = now

    # Curve key points in days wrt infection timestamp
    # Set plausible covid properties to make the computations easy to validate
    infectiousness_onset_days = 2.5
    viral_load_peak_start = 4.5
    incubation_days = 5
    viral_load_plateau_start = 5.5
    viral_load_plateau_end = 5.5 + 4.5
    recovery_days = 5 + 15

    human.infectiousness_onset_days = infectiousness_onset_days
    # viral_load_peak_start, viral_load_plateau_start and viral_load_plateau_
    # end are relative to infectiousness_onset_days
    human.viral_load_peak_start = viral_load_peak_start - infectiousness_onset_days
    human.incubation_days = incubation_days
    human.viral_load_plateau_start = viral_load_plateau_start - infectiousness_onset_days
    human.viral_load_plateau_end = viral_load_plateau_end - infectiousness_onset_days
    human.recovery_days = recovery_days

    human.viral_load_peak_height = 1.0
    human.viral_load_plateau_height = 0.75
    human.peak_plateau_slope = 0.25 / (viral_load_plateau_start -
                                       viral_load_peak_start)
    human.plateau_end_recovery_slope = 0.75 / (recovery_days -
                                               viral_load_plateau_end)

    assert viral_load_for_day(human, now) == 0.0
    # Between infection_timestamp and infectiousness_onset_days
    assert viral_load_for_day(
        human,
        now + datetime.timedelta(days=infectiousness_onset_days / 2)) == 0.0
    assert viral_load_for_day(
        human, now + datetime.timedelta(days=infectiousness_onset_days)) == 0.0
    # Between infectiousness_onset_days and viral_load_peak_start
    assert viral_load_for_day(
        human, now + datetime.timedelta(
            days=infectiousness_onset_days +
            (viral_load_peak_start - infectiousness_onset_days) / 2)
    ) == 1.0 / 2
    assert viral_load_for_day(
        human, now + datetime.timedelta(days=viral_load_peak_start)) == 1.0
    assert viral_load_for_day(
        human,
        now + datetime.timedelta(days=incubation_days)) == 0.75 + 0.25 / 2
    assert viral_load_for_day(
        human, now + datetime.timedelta(days=viral_load_plateau_start)) == 0.75
    # Between viral_load_plateau_start and viral_load_plateau_end
    assert viral_load_for_day(
        human, now + datetime.timedelta(
            days=viral_load_plateau_start +
            (viral_load_plateau_end - viral_load_plateau_start) / 2)) == 0.75
    assert viral_load_for_day(
        human, now + datetime.timedelta(days=viral_load_plateau_end)) == 0.75
    assert viral_load_for_day(
        human, now + datetime.timedelta(
            days=viral_load_plateau_end +
            (recovery_days - viral_load_plateau_end) / 2)) == 0.75 / 2
    assert viral_load_for_day(human, now +
                              datetime.timedelta(days=recovery_days)) == 0.0
Example #15
0
def simulate(
    n_people: int = 1000,
    init_fraction_sick: float = 0.01,
    start_time: datetime.datetime = datetime.datetime(2020, 2, 28, 0, 0),
    simulation_days: int = 30,
    outfile: typing.Optional[typing.AnyStr] = None,
    out_chunk_size: typing.Optional[int] = None,
    seed: int = 0,
    conf: typing.Optional[typing.Dict] = None,
    logfile: str = None,
):
    """
    Runs a simulation.

    Args:
        n_people (int, optional): population size in simulation. Defaults to 1000.
        init_fraction_sick (float, optional): population fraction initialized with Covid-19. Defaults to 0.01.
        start_time (datetime, optional):  Initial calendar date. Defaults to February 28, 2020.
        simulation_days (int, optional): Number of days to run the simulation. Defaults to 10.
        outfile (str, optional): Location to write logs. Defaults to None.
        out_chunk_size (int, optional): size of chunks to write in logs. Defaults to None.
        seed (int, optional): [description]. Defaults to 0.
        conf (dict): yaml configuration of the experiment.
        logfile (str): filepath where the console output and final tracked metrics will be logged. Prints to the console only if None.

    Returns:
        city (covid19sim.locations.city.City): The city object referencing people, locations, and the tracker post-simulation.
    """

    if conf is None:
        conf = {}

    conf["n_people"] = n_people
    conf["init_fraction_sick"] = init_fraction_sick
    conf["start_time"] = start_time
    conf["simulation_days"] = simulation_days
    conf["outfile"] = outfile
    conf["out_chunk_size"] = out_chunk_size
    conf["seed"] = seed
    conf['logfile'] = logfile

    # set days and mixing constants
    conf['_MEAN_DAILY_UNKNOWN_CONTACTS'] = conf['MEAN_DAILY_UNKNOWN_CONTACTS']
    conf['_ENVIRONMENTAL_INFECTION_KNOB'] = conf['ENVIRONMENTAL_INFECTION_KNOB']
    conf['_CURRENT_PREFERENTIAL_ATTACHMENT_FACTOR'] = conf['BEGIN_PREFERENTIAL_ATTACHMENT_FACTOR']
    start_time_offset_days = conf['COVID_START_DAY']
    intervention_start_days = conf['INTERVENTION_DAY']

    # start of COVID spread
    conf['COVID_SPREAD_START_TIME'] = start_time

    # start of intervention
    conf['INTERVENTION_START_TIME'] = None
    if intervention_start_days >= 0:
        conf['INTERVENTION_START_TIME'] = start_time + datetime.timedelta(days=intervention_start_days)

    # start of simulation without COVID
    start_time -= datetime.timedelta(days=start_time_offset_days)
    conf['SIMULATION_START_TIME'] = str(start_time)

    # adjust the simulation days
    conf['simulation_days'] += conf['COVID_START_DAY']
    simulation_days = conf['simulation_days']

    console_logger = ConsoleLogger(frequency=SECONDS_PER_DAY, logfile=logfile, conf=conf)
    logging.root.setLevel(getattr(logging, conf["LOGGING_LEVEL"].upper()))

    rng = np.random.RandomState(seed)
    env = Env(start_time)
    city_x_range = (0, 1000)
    city_y_range = (0, 1000)
    city = City(
        env, n_people, init_fraction_sick, rng, city_x_range, city_y_range, conf, logfile
    )

    # we might need to reset the state of the clusters held in shared memory (server or not)
    if conf.get("RESET_INFERENCE_SERVER", False):
        if conf.get("USE_INFERENCE_SERVER"):
            inference_frontend_address = conf.get("INFERENCE_SERVER_ADDRESS", None)
            print("requesting cluster reset from inference server...")
            from covid19sim.inference.server_utils import InferenceClient

            temporary_client = InferenceClient(
                server_address=inference_frontend_address
            )
            temporary_client.request_reset()
        else:
            from covid19sim.inference.heavy_jobs import DummyMemManager

            DummyMemManager.global_cluster_map = {}

    # Initiate city process, which runs every hour
    env.process(city.run(SECONDS_PER_HOUR, outfile))

    # initiate humans
    for human in city.humans:
        env.process(human.run())

    env.process(console_logger.run(env, city=city))

    # Run simulation until termination
    env.run(until=env.ts_initial + simulation_days * SECONDS_PER_DAY)

    return city
def test_incubation_days():
    """
    Intialize `Human`s and compute their covid properties.
    Test whether incubation days follow a lognormal distribution with mean 5 days and scale 2.5 days.
    Refer Table 2 (Appendix) in https://www.acpjournals.org/doi/10.7326/M20-0504 for parameters of lognormal fit
    Reference values: mu= 1.621 (1.504 - 1.755) sigma=0.418 (0.271 - 0.542)
    """
    conf = get_test_conf("test_covid_testing.yaml")

    def lognormal_func(x, mu, sigma):
        return lognorm.pdf(x, s=sigma, loc=0, scale=np.exp(mu))

    def normal_func(x, mu, sigma):
        return norm.pdf(x, loc=mu, scale=sigma)

    def gamma_func(x, shape, scale):
        return gamma.pdf(x, a=shape, scale=scale)

    N = 2
    rng = np.random.RandomState(42)
    fitted_incubation_params = []
    fitted_infectiousness_onset_params = []
    fitted_recovery_params = []
    # using matplotlib as a way to obtain density. TODO: use numpy
    fig, ax = plt.subplots()
    for i in range(N):
        n_people = rng.randint(500, 1000)
        init_fraction_sick = rng.uniform(0.01, 0.05)
        start_time = datetime.datetime(2020, 2, 28, 0, 0)

        env = Env(start_time)
        city_x_range = (0, 1000)
        city_y_range = (0, 1000)
        city = City(
            env,
            n_people,
            init_fraction_sick,
            rng,
            city_x_range,
            city_y_range,
            conf,
        )

        incubation_data, infectiousness_onset_data, recovery_data = [], [], []
        for human in city.humans:
            human.initial_viral_load = human.rng.random()
            compute_covid_properties(human)
            assert human.incubation_days >= 0, "negative incubation days"
            assert human.infectiousness_onset_days >= 0, "negative infectiousness onset days"
            assert human.recovery_days >= 0, "negative recovery days"
            incubation_data.append(human.incubation_days)
            infectiousness_onset_data.append(human.infectiousness_onset_days)
            recovery_data.append(human.recovery_days)

        print(f"minimum incubation days: {min(incubation_data)}")
        # convert into pmf
        ydata = np.array(incubation_data)
        pmf, xdata, _ = ax.hist(ydata, density=True)
        xdata = np.array([(xdata[i] + xdata[i + 1]) / 2
                          for i in range(0, xdata.shape[0] - 1)])
        popt, pcov = curve_fit(gamma_func, xdata, pmf)
        fitted_incubation_params.append(popt)

        # convert into pmf
        ydata = np.array(infectiousness_onset_data)
        pmf, xdata, _ = ax.hist(ydata, density=True)
        xdata = np.array([(xdata[i] + xdata[i + 1]) / 2
                          for i in range(0, xdata.shape[0] - 1)])
        popt, pcov = curve_fit(gamma_func, xdata, pmf)
        fitted_infectiousness_onset_params.append(popt)

        # convert into pmf
        ydata = np.array(recovery_data)
        pmf, xdata, _ = ax.hist(ydata, density=True)
        xdata = np.array([(xdata[i] + xdata[i + 1]) / 2
                          for i in range(0, xdata.shape[0] - 1)])
        popt, pcov = curve_fit(normal_func, xdata, pmf, bounds=(14, 30))
        fitted_recovery_params.append(popt)

    param_names = [
        "incubation days", "infectiousness onset days", "recovery days"
    ]
    for idx, fitted_params in enumerate([
            fitted_incubation_params, fitted_infectiousness_onset_params,
            fitted_recovery_params
    ]):
        all_params = np.array(fitted_params)

        # shape
        avg_mu, std_mu = all_params[:, 0].mean(), all_params[:, 0].std()
        ci_mu = norm.interval(0.95, loc=avg_mu, scale=std_mu)

        # scale
        avg_sigma, std_sigma = all_params[:, 1].mean(), all_params[:, 1].std()
        ci_sigma = norm.interval(0.95, loc=avg_sigma, scale=std_sigma)

        if param_names[idx] == "incubation days":
            print(
                f"**** Fitted Gamma distribution over {N} runs ... 95% CI ****"
            )
            print(f"{param_names[idx]}")
            print(
                f"shape: {avg_mu: 3.3f} ({ci_mu[0]: 3.3f} - {ci_mu[1]: 3.3f}) refernce value: 5.807 (3.585 - 13.865)"
            )
            print(
                f"scale: {avg_sigma: 3.3f} ({ci_sigma[0]: 3.3f} - {ci_sigma[1]: 3.3f}) refernce value: 0.948 (0.368 - 1.696)"
            )
            assert 3.585 <= avg_mu <= 13.865, "not a fitted gamma distribution"

        elif param_names[idx] == "infectiousness onset days":
            print(
                f"**** Fitted Gamma distribution over {N} runs ... 95% CI ****"
            )
            print(f"{param_names[idx]}")
            print(
                f"shape: {avg_mu: 3.3f} ({ci_mu[0]: 3.3f} - {ci_mu[1]: 3.3f}) refernce value: mean is 5.807-2.3 = 3.507 days (refer paramters in core.yaml)"
            )
            print(
                f"scale: {avg_sigma: 3.3f} ({ci_sigma[0]: 3.3f} - {ci_sigma[1]: 3.3f}) refernce value: no-source"
            )

        elif param_names[idx] == "recovery days":
            print(
                f"**** Fitted Normal distribution over {N} runs ... 95% CI ****"
            )
            print(f"{param_names[idx]}")
            print(
                f"mu: {avg_mu: 3.3f} ({ci_mu[0]: 3.3f} - {ci_mu[1]: 3.3f}) refernce value: mean is 14 + 5.807 = 19.807 days (refer paramters in core.yaml)"
            )
            print(
                f"sigma: {avg_sigma: 3.3f} ({ci_sigma[0]: 3.3f} - {ci_sigma[1]: 3.3f}) refernce value: no-source"
            )
Example #17
0
def test_household_distribution(
        seed: int,
        test_conf_name: str,
        avg_household_size_error_tol: float = 0.22, #TODO: change this back to 0.1. I had to bump it up otherwise the tests fail for inscrutable reasons...
        fraction_in_households_error_tol: float = 0.1,
        household_size_distribution_error_tol: float = 0.1):
    """
        Tests for the demographic statistics related to the households
            - each human is associated to a household
            - there is no empty household
            - average number of people per household
            - fraction of people in household
            - distribution of the number of people per household

    Reference values are from Canada statistics - census profile 2016 (ref: https://tinyurl.com/qsf2q8d)

    Args:
        test_conf_name (str): the filename of the configuration file used for testing
        avg_household_size_error_tol (float): tolerance to the average household size discrepancy
        fraction_in_households_error_tol (float): tolerance to the population fraction in households discrepancy
        household_size_distribution_error_tol (float): tolerance to the distribution of household size discrepancy
    """

    conf = get_test_conf(test_conf_name)

    # Test that all house_size preferences sum to 1
    P_HOUSEHOLD_SIZE = conf['P_HOUSEHOLD_SIZE']
    P_FAMILY_TYPE_SIZE_2 = conf['P_FAMILY_TYPE_SIZE_2']
    P_FAMILY_TYPE_SIZE_3 = conf['P_FAMILY_TYPE_SIZE_3']
    P_FAMILY_TYPE_SIZE_4 = conf['P_FAMILY_TYPE_SIZE_4']
    P_FAMILY_TYPE_SIZE_MORE_THAN_5 = conf['P_FAMILY_TYPE_SIZE_MORE_THAN_5']

    # household size
    val = np.sum(P_HOUSEHOLD_SIZE)
    assert math.fabs(np.sum(P_HOUSEHOLD_SIZE) - 1.) < 1e-6, \
        f'The P_HOUSEHOLD_SIZE does not sum to 1. (actual value= {val})'

    # household sizes
    val = np.sum(P_FAMILY_TYPE_SIZE_2)
    assert math.fabs(np.sum(P_FAMILY_TYPE_SIZE_2) - P_HOUSEHOLD_SIZE[1]) < 1e-6, \
        f'The P_FAMILY_TYPE_SIZE_2 does not sum to P_HOUSEHOLD_SIZE[1]. (actual value= {val}, expected value={P_HOUSEHOLD_SIZE[1]})'

    val = np.sum(P_FAMILY_TYPE_SIZE_3)
    assert math.fabs(np.sum(P_FAMILY_TYPE_SIZE_3) - P_HOUSEHOLD_SIZE[2]) < 1e-6, \
        f'The P_FAMILY_TYPE_SIZE_3 does not sum to P_HOUSEHOLD_SIZE[2]. (actual value= {val}, expected value={P_HOUSEHOLD_SIZE[2]})'

    val = np.sum(P_FAMILY_TYPE_SIZE_4)
    assert math.fabs(np.sum(P_FAMILY_TYPE_SIZE_4) - P_HOUSEHOLD_SIZE[3]) < 1e-6, \
        f'The P_FAMILY_TYPE_SIZE_4 does not sum to P_HOUSEHOLD_SIZE[3]. (actual value= {val}, expected value={P_HOUSEHOLD_SIZE[3]})'

    val = np.sum(P_FAMILY_TYPE_SIZE_MORE_THAN_5)
    assert math.fabs(np.sum(P_FAMILY_TYPE_SIZE_MORE_THAN_5) - P_HOUSEHOLD_SIZE[4]) < 1e-6, \
        f'The P_FAMILY_TYPE_SIZE_MORE_THAN_5 does not sum to P_HOUSEHOLD_SIZE[4]. (actual value= {val}, expected value={P_HOUSEHOLD_SIZE[4]})'


    n_people = 5000
    init_fraction_sick = 0.01
    rng = np.random.RandomState(seed=seed)
    start_time = datetime.datetime(2020, 2, 28, 0, 0)
    env = Env(start_time)
    city_x_range = (0, 1000)
    city_y_range = (0, 1000)
    conf['simulation_days'] = 1
    city = City(
        env=env,
        n_people=n_people,
        init_fraction_sick=init_fraction_sick,
        rng=rng,
        x_range=city_x_range,
        y_range=city_y_range,
        conf=conf,
        logfile="logfile.txt"
    )

    # Verify that each human is associated to a household
    for human in city.humans:
        assert human.household, f'There is at least one individual without household.'

    n_resident_in_households = 0
    sim_household_size_distribution = [0., 0., 0., 0., 0.]
    for household in city.households:
        n_resident = len(household.residents)
        assert n_resident > 0, f'There is an empty household.'
        n_resident_in_households += n_resident
        if n_resident < 5:
            sim_household_size_distribution[n_resident - 1] += 1
        else:
            sim_household_size_distribution[-1] += 1
    sim_household_size_distribution = np.array(sim_household_size_distribution) / len(city.households)
    sim_avg_household_size = n_resident_in_households / len(city.households)

    # Average number of resident per household
    avg_household_size = conf['AVG_HOUSEHOLD_SIZE']  # Value from CanStats
    assert math.fabs(sim_avg_household_size - avg_household_size) < avg_household_size_error_tol, \
        f'The empirical average household size is {sim_avg_household_size:.2f}' \
        f' while the statistics for Canada is {avg_household_size:.2f}'

    # Number of persons in private household from
    fraction_in_households = 0.98  # Value from CanStats
    sim_fraction_in_households = n_resident_in_households / n_people
    assert math.fabs(sim_fraction_in_households - fraction_in_households) < fraction_in_households_error_tol, \
        f'The empirical fraction of people in households is {sim_fraction_in_households:.2f}' \
        f' while the statistics for Canada is {fraction_in_households:.2f}'

    # Household size distribution from
    household_size_distribution = conf['P_HOUSEHOLD_SIZE']
    assert np.allclose(
        sim_household_size_distribution,
        household_size_distribution,
        atol=household_size_distribution_error_tol), \
        f'the discrepancy between simulated and estimated household size distribution is too important.'
Example #18
0
def test_track_serial_interval():
    """
    Test the various cases of serial interval tracking
    """

    with tempfile.TemporaryDirectory() as output_dir:

        rng = np.random.RandomState(42)

        # Config
        start_time = datetime.datetime(2020, 2, 28, 0, 0)
        simulation_days = 40
        city_x_range = (0, 1000)
        city_y_range = (0, 1000)

        # Find the test_configs directory, and load the required config yaml
        conf = get_test_conf("naive_local.yaml")

        env = Env(start_time)
        city = EmptyCity(env, rng, city_x_range, city_y_range, conf)

        sr =  Household(
                        env=env,
                        rng=np.random.RandomState(rng.randint(2 ** 16)),
                        conf=conf,
                        name=f"SENIOR_RESIDENCE:0",
                        location_type="SENIOR_RESIDENCE",
                        lat=rng.randint(*city_x_range),
                        lon=rng.randint(*city_y_range),
                        area=1000,
                        capacity=None,
                    )
        city.senior_residences.append(sr)

        N = 10

        # Create humans
        ages = city.rng.randint(*(65, 100), size=N)

        humans = [
            Human(
                env=city.env,
                city=city,
                name=i,
                age=ages[i],
                rng=rng,
                conf=conf,
            )
            for i in range(N)
        ]
        city.n_init_infected = 0

        for human in humans:
            human.assign_household(sr)
            sr.residents.append(human)

        city.humans = humans
        city.initWorld()

        t = Tracker(env, city, conf, None)
        t.start_tracking = True
        # Create some infections
        infections = [(0,1), (0,2), (1,3), (2,5)]
        for infector, infectee in infections:
            to_human = humans[infectee]
            from_human = humans[infector]
            t.serial_interval_book_to[to_human.name][from_human.name] = (to_human, from_human)
            t.serial_interval_book_from[from_human.name][to_human.name] = (to_human, from_human)

        # check no interval is registered for only to_human symptoms
        # or only from_human symptoms
        humans[1].covid_symptom_start_time = datetime.datetime(2020, 2, 28, 0, 0)
        t.track_serial_interval(humans[1].name)
        assert len(t.serial_intervals)==0

        humans[5].covid_symptom_start_time = (datetime.datetime(2020, 2, 28, 0, 0)+datetime.timedelta(days=4))
        t.track_serial_interval(humans[5].name)
        assert len(t.serial_intervals)==0

        # check a negative interval is registered for subsequent infector symptoms
        humans[0].covid_symptom_start_time = (datetime.datetime(2020, 2, 28, 0, 0)+datetime.timedelta(days=1))
        t.track_serial_interval(humans[0].name)
        assert len(t.serial_intervals)==1
        assert t.serial_intervals[0]==-1.0

        # check infector and infectee intervals are registered
        humans[2].covid_symptom_start_time = (datetime.datetime(2020, 2, 28, 0, 0)+datetime.timedelta(days=2))
        t.track_serial_interval(humans[2].name)
        assert len(t.serial_intervals)==3
        # Intervals (2,5) and (0,2) should be registered
        assert sorted(t.serial_intervals[-2:])==[1,2]

        # assert calling twice has no effect
        t.track_serial_interval(humans[2].name)
        assert len(t.serial_intervals)==3
        # Intervals (2,5) and (0,2) should be registered
        assert sorted(t.serial_intervals[-2:])==[1,2]

        # check what's left in the serial_interval_book_to, serial_interval_book_from
        assert humans[1].name in t.serial_interval_book_to[humans[3].name]
        assert len(t.serial_interval_book_to[humans[3].name])==1

        assert humans[3].name in t.serial_interval_book_from[humans[1].name]
        assert len(t.serial_interval_book_from[humans[1].name])==1

        #check all the others are empty
        for i in [5,0,2]:
            assert len(t.serial_interval_book_from[humans[i].name])==0
            assert len(t.serial_interval_book_to[humans[i].name])==0
Example #19
0
def test_app_distribution(
        test_conf_name: str,
        app_uptake: float
):
    """
        Tests for the demographic statistics related to the app users
            - age distribution of the app users when all individuals have the app or with different uptake

    Args:
        test_conf_name (str): the filename of the configuration file used for testing
        app_uptake (float): probability that an individual with a smartphone has the app
    """

    conf = get_test_conf(test_conf_name)

    if app_uptake:
        conf['APP_UPTAKE'] = app_uptake

    n_people = 1000
    init_fraction_sick = 0.01
    start_time = datetime.datetime(2020, 2, 28, 0, 0)

    seed = 0
    rng = np.random.RandomState(seed=seed)
    env = Env(start_time)
    city_x_range = (0, 1000)
    city_y_range = (0, 1000)
    conf['simulation_days'] = 1
    city = City(
        env=env,
        n_people=n_people,
        init_fraction_sick=init_fraction_sick,
        rng=rng,
        x_range=city_x_range,
        y_range=city_y_range,
        conf=conf,
        logfile="logfile.txt",
    )
    city.have_some_humans_download_the_app()

    population = []
    for human in city.humans:
        population.append([
            human.age,
            human.sex,
            human.has_app,
        ])

    df = pd.DataFrame.from_records(
        data=population,
        columns=['age', 'sex', 'has_app']
    )

    # Check the age distribution of the app users
    if conf.get('APP_UPTAKE') < 0:
        age_app_histogram = conf.get('SMARTPHONE_OWNER_FRACTION_BY_AGE')
        age_app_groups = [(low, up) for low, up, p in age_app_histogram]  # make the age groups contiguous
        intervals = pd.IntervalIndex.from_tuples(age_app_groups, closed='both')
        age_grouped = df.groupby(pd.cut(df['age'], intervals))
        age_grouped = age_grouped.agg({'age': 'count', 'has_app': 'sum'})
        assert age_grouped.age.sum() == n_people
        age_stats = age_grouped.age.apply(lambda x: x / n_people)
        app_stats = age_grouped.has_app.apply(lambda x: x / n_people)
        assert np.allclose(age_stats.to_numpy(), app_stats.to_numpy())
    else:
        abs_age_histogram = utils.relativefreq2absolutefreq(
            bins_fractions={(x1, x2): p for x1, x2, p in conf.get('P_AGE_REGION')},
            n_elements=n_people,
            rng=city.rng
        )
        age_histogram_bin_10s = utils._convert_bin_5s_to_bin_10s(abs_age_histogram)
        n_apps_per_age = {
            (x[0], x[1]): math.ceil(age_histogram_bin_10s[(x[0], x[1])] * x[2] * conf.get('APP_UPTAKE'))
            for x in conf.get("SMARTPHONE_OWNER_FRACTION_BY_AGE")
        }
        n_apps = np.sum(list(n_apps_per_age.values()))

        intervals = pd.IntervalIndex.from_tuples(n_apps_per_age.keys(), closed='both')
        age_grouped = df.groupby(pd.cut(df['age'], intervals))
        age_grouped = age_grouped.agg({'age': 'count', 'has_app': 'sum'})
        assert age_grouped.age.sum() == n_people
        assert age_grouped.has_app.sum() == n_apps
        age_grouped = age_grouped.has_app.apply(lambda x: x / n_apps)

        assert np.allclose(age_grouped.to_numpy(), np.array(list(n_apps_per_age.values())) / n_apps)
Example #20
0
    def load_human_data(self, start_idx=None, end_idx=None, ids=[]):
        """
        Load human backups and event data for the specified humans.

        Ex : Calling with start_idx=1 and end_idx=4 will load data for
        the second, third and fourth humans.

        Args:
            start_idx (int, optional): Index (starting at 0) of the first human to load.
                If unspecified, loading will start at first human.
            end_idx (int, optional): Index (starting at 0) of the last human to load plus one.
                If unspecified, humans up until the last one will be loaded.

        Returns:
            [type]: [description]
        """
        if start_idx is None:
            start_idx = 0
        if end_idx is None:
            end_idx = self.get_nb_humans()
        assert start_idx < end_idx

        # If we pass in a specfic set of human ids, go get those ones, otherwise batch
        if ids:
            idxs = [int(i.split(":")[-1])-1 for i in ids]
        else:
            idxs = range(start_idx, end_idx)

        human_backups = {}
        humans_events = {}
        latest_human_buffers = [None] * self.get_nb_humans()
        print("loading humans from delta buffer...")
        # first, quickly load all the raw data, we'll rebuild the full objects afterwards
        for day_idx in tqdm.tqdm(range(self.get_nb_days())):
            for hour_idx in range(24):
                for idx in idxs:
                    # here, we assume the data is always encoded in a delta-since-last format
                    if latest_human_buffers[idx] is None or not self.is_delta[day_idx, hour_idx, idx]:
                        assert not self.is_delta[day_idx, hour_idx, idx]
                        latest_human_buffers[idx] = self.dataset[day_idx, hour_idx, idx]
                        human_buffer = latest_human_buffers[idx]
                    else:
                        human_delta = self.dataset[day_idx, hour_idx, idx]
                        human_buffer = xdelta3.decode(latest_human_buffers[idx], human_delta)
                        latest_human_buffers[idx] = human_buffer
                    human_data = pickle.loads(human_buffer)
                    timestamp = human_data.env.timestamp
                    human_data.conf = self.conf
                    if timestamp not in human_backups:
                        human_backups[timestamp] = {}
                    human_backups[timestamp][human_data.name] = human_data
        human_constr_args = [k for k in inspect.getfullargspec(Human.__init__)[0] if k != "self"]
        for timestamp, humans in human_backups.items():
            # time to recreate the (approx) full object w/ its member functions
            for human_name, human_dump in humans.items():
                new_env = Env(human_dump.env.initial_timestamp)
                new_env._now = human_dump.env.now
                human_dump.env = new_env
                human_dump.rng = human_dump.init_seed  # to keep same init construction state
                human_obj = Human(*[getattr(human_dump, k) for k in human_constr_args])
                # override all attributes except the blacklist/dummy ones
                for attr_name in human_obj.__dict__.keys():
                    if attr_name != "env" and attr_name not in human_dump.blacklisted_attribs and attr_name != "known_connections" and attr_name != "intervened_behavior":
                        setattr(human_obj, attr_name, getattr(human_dump, attr_name))
                human_obj.name = human_dump.name
                humans[human_name] = human_obj
            # now, extract human event data
            for human_name, human in humans.items():
                humans_events[human.name] = {}
                for event in human._events:
                    humans_events[human.name][(event["time"], event["event_type"])] = event
                human._events = []
        # finally, ensure events are sorted by timestamp for each human
        for human_id, human_events in humans_events.items():
            events = list(human_events.values())
            events.sort(key=lambda e: e["time"])
            humans_events[human_id] = events
        return human_backups, humans_events