def test_generate_larger_households(location='seattle_metro', state_location='Washington', country_location='usa'): Nhomes_to_sample_smooth = 1000 household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location) hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth, household_size_distr) hha_brackets = sp.get_head_age_brackets(datadir, country_location=country_location) hha_by_size_counts = sp.get_head_age_by_size_distr( datadir, country_location=country_location) age_brackets_filepath = sp.get_census_age_brackets_path( datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic( datadir, sheet_name='United States of America') single_year_age_distr = {} for n in range(101): single_year_age_distr[n] = float(1.0 / 101.0) # generate households of size 3 size = 3 # first variable is the household size to be created, so here this means we want to create all households of size 3 and the hh_sizes variable tells us how many of size 3 will be created at index 3-1 (since hh_sizes is an array rather than a dictionary) larger_households = sp.generate_larger_households( size, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets, age_by_brackets_dic, contact_matrix_dic, single_year_age_distr) assert larger_households is not None print(larger_households)
def test_generate_all_households(location='seattle_metro', state_location='Washington', country_location='usa'): N = 1000 household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location) hh_sizes = sp.generate_household_sizes_from_fixed_pop_size( N, household_size_distr) hha_brackets = sp.get_head_age_brackets(datadir, country_location=country_location) hha_by_size_counts = sp.get_head_age_by_size_distr( datadir, country_location=country_location) age_brackets_filepath = sp.get_census_age_brackets_path( datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic( datadir, sheet_name='United States of America') single_year_age_distr = {} for n in range(101): single_year_age_distr[n] = float(1.0 / 101.0) homes_dic, homes = sp.generate_all_households( N, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets, age_by_brackets_dic, contact_matrix_dic, single_year_age_distr) assert homes_dic, homes is not None
syn_ages, syn_sexes = sp.get_usa_age_sex_n(location, state_location, totalpop) syn_age_count = Counter(syn_ages) syn_age_distr = sp.norm_dic(Counter(syn_ages)) N = Nhomes hh_sizes = sp.generate_household_sizes_from_fixed_pop_size( N, household_size_distr) totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes) print(totalpop, 'pop') hha_df = sp.get_household_head_age_by_size_df(datadir, country_location, use_bayesian) hha_brackets = sp.get_head_age_brackets(datadir, country_location, use_bayesian) hha_by_size = sp.get_head_age_by_size_distr(datadir, country_location, use_bayesian) homes_dic, homes = sp.generate_all_households(hh_sizes, hha_by_size, hha_brackets, age_brackets, age_by_brackets_dic, contact_matrix_dic, syn_age_distr) c = 0 for s in homes_dic: c += s * len(homes_dic[s]) print('c', c) homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes)
def check_household_head(pop, n, datadir, figdir, state_location=None, country_location=None, file_path=None, use_default=False, test_prefix="", do_close=True): """ Check the household head by age distribution against the reference data Args: pop : population dictionary n : population size datadir : root data directory which has resides the reference data figdir : directory where to result files are saved state_location : name of the state the location is in country_location : name of the country the location is in file_path : file path to user specified gender by age bracket distribution data use_default : if True, try to first use the other parameters to find data specific to the location under study, otherwise returns default data drawing from Seattle, Washington. test_prefix : used for prefix of the plot title do_close : close the image immediately if set to True Returns: None. Plots will be save to figdir if provided """ figdir = os.path.join(figdir, "household_head") household_head_age_distribution_by_family_size = sp.get_head_age_by_size_distr( state_location=state_location, country_location=country_location) head_age_brackets = sp.get_head_age_brackets( state_location=state_location, country_location=country_location) # Inverse the mapping for use below hha_index = sp.get_index_by_brackets_dic(head_age_brackets) household_head_age_distribution_by_family_size = household_head_age_distribution_by_family_size[ 1:] expected_hh_ages = pandas.DataFrame( household_head_age_distribution_by_family_size) expected_hh_ages_percentage = expected_hh_ages.div( expected_hh_ages.sum(axis=0), axis=1) actual_hh_ages_percetnage = utilities.get_household_head_age_size( pop, index=hha_index) expected_values = expected_hh_ages_percentage.values[1:, :] actual_values = actual_hh_ages_percetnage.values xlabels = [ f'{min(head_age_brackets[bracket_index])}-{max(head_age_brackets[bracket_index])}' for bracket_index in head_age_brackets.keys() ] family_sizes = [i + 2 for i in range(0, len(expected_hh_ages_percentage))] utilities.plot_heatmap( expected_values, actual_values, xlabels, family_sizes, 'Head of Household Age', 'Household Size', # expected_hh_ages_percentage.columns, # family_sizes, testprefix="household_head_age_family_size " + test_prefix, figdir=figdir, do_close=do_close)
syn_ages, syn_sexes = sp.get_usa_age_sex_n(datadir, location, state_location, country_location, totalpop) syn_age_count = Counter(syn_ages) syn_age_distr = sp.norm_dic(Counter(syn_ages)) N = Nhomes hh_sizes = sp.generate_household_sizes_from_fixed_pop_size( N, household_size_distr) totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes) print(totalpop, 'pop') hha_df = sp.get_household_head_age_by_size_df(datadir, state_location, country_location) hha_brackets = sp.get_head_age_brackets(datadir, country_location=country_location) hha_by_size = sp.get_head_age_by_size_distr( datadir, country_location=country_location) homes_dic, homes = sp.generate_all_households(N, hh_sizes, hha_by_size, hha_brackets, age_brackets, age_by_brackets_dic, contact_matrix_dic, deepcopy(syn_age_distr)) homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes) new_ages_count = Counter(age_by_uid_dic.values()) fig = plt.figure(figsize=(6, 4)) ax = fig.add_subplot(111) x = np.arange(100)