def test_generate_larger_households(location='seattle_metro', state_location='Washington', country_location='usa'): Nhomes_to_sample_smooth = 1000 household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location) hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth, household_size_distr) hha_brackets = sp.get_head_age_brackets(datadir, country_location=country_location) hha_by_size_counts = sp.get_head_age_by_size_distr( datadir, country_location=country_location) age_brackets_filepath = sp.get_census_age_brackets_path( datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic( datadir, sheet_name='United States of America') single_year_age_distr = {} for n in range(101): single_year_age_distr[n] = float(1.0 / 101.0) # generate households of size 3 size = 3 # first variable is the household size to be created, so here this means we want to create all households of size 3 and the hh_sizes variable tells us how many of size 3 will be created at index 3-1 (since hh_sizes is an array rather than a dictionary) larger_households = sp.generate_larger_households( size, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets, age_by_brackets_dic, contact_matrix_dic, single_year_age_distr) assert larger_households is not None print(larger_households)
def test_generate_household_sizes(location='seattle_metro', state_location='Washington', country_location='usa'): sc.heading('Generate household sizes') Nhomes_to_sample_smooth = 1000 household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location) hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth, household_size_distr) assert len(hh_sizes) == 7
def test_get_totalpopsizes_from_household_sizes(location='seattle_metro', state_location='Washington', country_location='usa'): household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location) Nhomes_to_sample_smooth = 1000 hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth, household_size_distr) sum_hh_sizes = sp.get_totalpopsize_from_household_sizes(hh_sizes) assert sum_hh_sizes is not None
def test_generate_household_sizes(self): """ Test generate_larger_household_sizes method if hh_size =1, it expectes method to return an empty array, otherwise an array of counts which the total should match the the hh_size[1:]. Returns: None """ size1 = sp.generate_household_sizes(hh_sizes=[]) self.assertEqual(len(size1), 0) for i in range(2, 10): size = np.random.randint(low=1, high=50, size=i) with self.subTest(size=size): print(f"hh_size:{size}") result = sp.generate_household_sizes(hh_sizes=size) print(f"actual hh_size:{collections.Counter(size)}") self.assertEqual(sum(size), len(result))
def test_custom_household_size_distro_honored(self): self.is_debugging = False custom_distro = { 1: 0.25, 2: 0.075, 3: 0.10, 4: 0.30, 5: 0.05, 6: 0.05, 7: 0.175 } hh_sizes = sp.generate_household_sizes(500, custom_distro) hh_size_list = list(hh_sizes) # Comes as np.ndarray fewest_houses = min(hh_size_list) fewest_index = hh_size_list.index(fewest_houses) most_houses = max(hh_size_list) most_index = hh_size_list.index(most_houses) highest_probability = max(custom_distro.values()) lowest_probability = min(custom_distro.values()) most_houses_probability = custom_distro[most_index + 1] # hh_distro is 1 indexed fewest_houses_probability = custom_distro[fewest_index + 1] self.assertEqual( highest_probability, most_houses_probability, msg= "The most common household size should be the size with the highest probability" ) prob_bucket_list = list(custom_distro.values()) self.verify_portion_honored(probability_buckets=prob_bucket_list, count_buckets=hh_size_list, portion=0.25) self.verify_portion_honored(probability_buckets=prob_bucket_list, count_buckets=hh_size_list, portion=0.2) self.verify_portion_honored(probability_buckets=prob_bucket_list, count_buckets=hh_size_list, portion=0.1)
print(household_size_distr) # Nhomes = 20000 Nhomes = 10000 # create_homes = True create_homes = False if create_homes: household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location, use_bayesian) print(household_size_distr) Nhomes_to_sample_smooth = 100000 hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth, household_size_distr) totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes) # hh_sizes = sp.generate_household_sizes(Nhomes,household_size_distr) syn_ages, syn_sexes = sp.get_usa_age_sex_n(location, state_location, totalpop) syn_age_count = Counter(syn_ages) syn_age_distr = sp.norm_dic(Counter(syn_ages)) N = Nhomes hh_sizes = sp.generate_household_sizes_from_fixed_pop_size( N, household_size_distr) totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes) print(totalpop, 'pop')