Example #1
0
def test_generate_larger_households(location='seattle_metro',
                                    state_location='Washington',
                                    country_location='usa'):
    Nhomes_to_sample_smooth = 1000
    household_size_distr = sp.get_household_size_distr(datadir, location,
                                                       state_location,
                                                       country_location)
    hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth,
                                           household_size_distr)

    hha_brackets = sp.get_head_age_brackets(datadir,
                                            country_location=country_location)
    hha_by_size_counts = sp.get_head_age_by_size_distr(
        datadir, country_location=country_location)

    age_brackets_filepath = sp.get_census_age_brackets_path(
        datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(
        datadir, sheet_name='United States of America')

    single_year_age_distr = {}
    for n in range(101):
        single_year_age_distr[n] = float(1.0 / 101.0)

    # generate households of size 3
    size = 3
    # first variable is the household size to be created, so here this means we want to create all households of size 3 and the hh_sizes variable tells us how many of size 3 will be created at index 3-1 (since hh_sizes is an array rather than a dictionary)
    larger_households = sp.generate_larger_households(
        size, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets,
        age_by_brackets_dic, contact_matrix_dic, single_year_age_distr)
    assert larger_households is not None
    print(larger_households)
def test_generate_household_sizes(location='seattle_metro', state_location='Washington', country_location='usa'):
    sc.heading('Generate household sizes')

    Nhomes_to_sample_smooth = 1000
    household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location)
    hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth, household_size_distr)
    assert len(hh_sizes) == 7
def test_get_totalpopsizes_from_household_sizes(location='seattle_metro', state_location='Washington',
                                                country_location='usa'):
    household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location)

    Nhomes_to_sample_smooth = 1000
    hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth, household_size_distr)
    sum_hh_sizes = sp.get_totalpopsize_from_household_sizes(hh_sizes)
    assert sum_hh_sizes is not None
    def test_generate_household_sizes(self):
        """
        Test generate_larger_household_sizes method if hh_size =1, it expectes
        method to return an empty array, otherwise an array of counts which the
        total should match the the hh_size[1:].

        Returns:
            None
        """
        size1 = sp.generate_household_sizes(hh_sizes=[])
        self.assertEqual(len(size1), 0)
        for i in range(2, 10):
            size = np.random.randint(low=1, high=50, size=i)
            with self.subTest(size=size):
                print(f"hh_size:{size}")
                result = sp.generate_household_sizes(hh_sizes=size)
                print(f"actual hh_size:{collections.Counter(size)}")
                self.assertEqual(sum(size), len(result))
Example #5
0
    def test_custom_household_size_distro_honored(self):
        self.is_debugging = False
        custom_distro = {
            1: 0.25,
            2: 0.075,
            3: 0.10,
            4: 0.30,
            5: 0.05,
            6: 0.05,
            7: 0.175
        }
        hh_sizes = sp.generate_household_sizes(500, custom_distro)

        hh_size_list = list(hh_sizes)  # Comes as np.ndarray
        fewest_houses = min(hh_size_list)
        fewest_index = hh_size_list.index(fewest_houses)

        most_houses = max(hh_size_list)
        most_index = hh_size_list.index(most_houses)

        highest_probability = max(custom_distro.values())
        lowest_probability = min(custom_distro.values())

        most_houses_probability = custom_distro[most_index +
                                                1]  # hh_distro is 1 indexed
        fewest_houses_probability = custom_distro[fewest_index + 1]

        self.assertEqual(
            highest_probability,
            most_houses_probability,
            msg=
            "The most common household size should be the size with the highest probability"
        )

        prob_bucket_list = list(custom_distro.values())
        self.verify_portion_honored(probability_buckets=prob_bucket_list,
                                    count_buckets=hh_size_list,
                                    portion=0.25)
        self.verify_portion_honored(probability_buckets=prob_bucket_list,
                                    count_buckets=hh_size_list,
                                    portion=0.2)
        self.verify_portion_honored(probability_buckets=prob_bucket_list,
                                    count_buckets=hh_size_list,
                                    portion=0.1)
print(household_size_distr)

# Nhomes = 20000
Nhomes = 10000

# create_homes = True
create_homes = False
if create_homes:
    household_size_distr = sp.get_household_size_distr(datadir, location,
                                                       state_location,
                                                       country_location,
                                                       use_bayesian)
    print(household_size_distr)

    Nhomes_to_sample_smooth = 100000
    hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth,
                                           household_size_distr)
    totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes)

    # hh_sizes = sp.generate_household_sizes(Nhomes,household_size_distr)

    syn_ages, syn_sexes = sp.get_usa_age_sex_n(location, state_location,
                                               totalpop)
    syn_age_count = Counter(syn_ages)
    syn_age_distr = sp.norm_dic(Counter(syn_ages))

    N = Nhomes
    hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(
        N, household_size_distr)
    totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes)

    print(totalpop, 'pop')