Ejemplo n.º 1
0
    def test_generate_household_sizes_from_fixed_pop_size(self):
        """
        Test generate_household_sizes_from_fixed_pop_size the test data is
        specifically crafted to execute all conditional branches of the method.

        Returns:
            None
        """
        even_dist = {1: 0.2, 2: 0.2, 3: 0.2, 4: 0.2, 5: 0.2}
        # 900 is divisble by the expected value (3.0) but 901 is not
        # this creates test cases for N_gen = N and N_gen < N condition
        for i in [900, 901]:
            hh = sp.generate_household_sizes_from_fixed_pop_size(
                N=i, hh_size_distr=even_dist)
            # verify the total number of people matches N
            self.assertEqual(i,
                             sum([(n + 1) * hh[n] for n in range(0, len(hh))]))
            # verify distribution
            self.verify_buckets(even_dist.values(), hh)

        # slightly modify the distribution to create expected value = 2.91 which will round down to 2.9
        # and create N_gen > N condition
        uneven_dist = {1: 0.2, 2: 0.2, 3: 0.2, 4: 0.29, 5: 0.11}
        hh2 = sp.generate_household_sizes_from_fixed_pop_size(
            N=900, hh_size_distr=uneven_dist)
        self.assertEqual(900,
                         sum([(n + 1) * hh2[n] for n in range(0, len(hh2))]))
        self.verify_buckets(uneven_dist.values(), hh2)
Ejemplo n.º 2
0
def test_generate_all_households(location='seattle_metro',
                                 state_location='Washington',
                                 country_location='usa'):
    N = 1000
    household_size_distr = sp.get_household_size_distr(datadir, location,
                                                       state_location,
                                                       country_location)

    hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(
        N, household_size_distr)
    hha_brackets = sp.get_head_age_brackets(datadir,
                                            country_location=country_location)
    hha_by_size_counts = sp.get_head_age_by_size_distr(
        datadir, country_location=country_location)

    age_brackets_filepath = sp.get_census_age_brackets_path(
        datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(
        datadir, sheet_name='United States of America')

    single_year_age_distr = {}
    for n in range(101):
        single_year_age_distr[n] = float(1.0 / 101.0)

    homes_dic, homes = sp.generate_all_households(
        N, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets,
        age_by_brackets_dic, contact_matrix_dic, single_year_age_distr)
    assert homes_dic, homes is not None
Ejemplo n.º 3
0
def test_generate_household_sizes_from_fixed_pop_size(location='seattle_metro', state_location='Washington',
                                                      country_location='usa'):
    household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location)

    Nhomes = 1000
    hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(Nhomes, household_size_distr)
    assert len(hh_sizes) == 7
Ejemplo n.º 4
0
    def test_custom_household_size_distro_honored(self):
        """
        This methods checks results from
        generate_household_sizes_from_fixed_pop_size with customized
        distribution. It checks that the most common household size should be
        the size with the highest probability and also uses
        verify_portion_honored method for validation logic.

        Returns:
            None
        """
        self.is_debugging = False
        custom_distro = {
            1: 0.25,
            2: 0.075,
            3: 0.10,
            4: 0.30,
            5: 0.05,
            6: 0.05,
            7: 0.175
        }
        hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(
            500, custom_distro)

        hh_size_list = list(hh_sizes)  # Comes as np.ndarray
        fewest_houses = min(hh_size_list)
        fewest_index = hh_size_list.index(fewest_houses)

        most_houses = max(hh_size_list)
        most_index = hh_size_list.index(most_houses)

        highest_probability = max(custom_distro.values())
        lowest_probability = min(custom_distro.values())

        most_houses_probability = custom_distro[most_index +
                                                1]  # hh_distro is 1 indexed
        fewest_houses_probability = custom_distro[fewest_index + 1]

        self.assertEqual(
            highest_probability,
            most_houses_probability,
            msg=
            "The most common household size should be the size with the highest probability"
        )

        prob_bucket_list = list(custom_distro.values())
        self.verify_portion_honored(probability_buckets=prob_bucket_list,
                                    count_buckets=hh_size_list,
                                    portion=0.25)
        self.verify_portion_honored(probability_buckets=prob_bucket_list,
                                    count_buckets=hh_size_list,
                                    portion=0.2)
        self.verify_portion_honored(probability_buckets=prob_bucket_list,
                                    count_buckets=hh_size_list,
                                    portion=0.1)
    print(household_size_distr)

    Nhomes_to_sample_smooth = 100000
    hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth,
                                           household_size_distr)
    totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes)

    # hh_sizes = sp.generate_household_sizes(Nhomes,household_size_distr)

    syn_ages, syn_sexes = sp.get_usa_age_sex_n(location, state_location,
                                               totalpop)
    syn_age_count = Counter(syn_ages)
    syn_age_distr = sp.norm_dic(Counter(syn_ages))

    N = Nhomes
    hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(
        N, household_size_distr)
    totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes)

    print(totalpop, 'pop')

    hha_df = sp.get_household_head_age_by_size_df(datadir, country_location,
                                                  use_bayesian)
    hha_brackets = sp.get_head_age_brackets(datadir, country_location,
                                            use_bayesian)
    hha_by_size = sp.get_head_age_by_size_distr(datadir, country_location,
                                                use_bayesian)

    homes_dic, homes = sp.generate_all_households(hh_sizes, hha_by_size,
                                                  hha_brackets, age_brackets,
                                                  age_by_brackets_dic,
                                                  contact_matrix_dic,