Esempio n. 1
0
def test_generate_larger_households(location='seattle_metro',
                                    state_location='Washington',
                                    country_location='usa'):
    Nhomes_to_sample_smooth = 1000
    household_size_distr = sp.get_household_size_distr(datadir, location,
                                                       state_location,
                                                       country_location)
    hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth,
                                           household_size_distr)

    hha_brackets = sp.get_head_age_brackets(datadir,
                                            country_location=country_location)
    hha_by_size_counts = sp.get_head_age_by_size_distr(
        datadir, country_location=country_location)

    age_brackets_filepath = sp.get_census_age_brackets_path(
        datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(
        datadir, sheet_name='United States of America')

    single_year_age_distr = {}
    for n in range(101):
        single_year_age_distr[n] = float(1.0 / 101.0)

    # generate households of size 3
    size = 3
    # first variable is the household size to be created, so here this means we want to create all households of size 3 and the hh_sizes variable tells us how many of size 3 will be created at index 3-1 (since hh_sizes is an array rather than a dictionary)
    larger_households = sp.generate_larger_households(
        size, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets,
        age_by_brackets_dic, contact_matrix_dic, single_year_age_distr)
    assert larger_households is not None
    print(larger_households)
Esempio n. 2
0
def test_generate_all_households(location='seattle_metro',
                                 state_location='Washington',
                                 country_location='usa'):
    N = 1000
    household_size_distr = sp.get_household_size_distr(datadir, location,
                                                       state_location,
                                                       country_location)

    hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(
        N, household_size_distr)
    hha_brackets = sp.get_head_age_brackets(datadir,
                                            country_location=country_location)
    hha_by_size_counts = sp.get_head_age_by_size_distr(
        datadir, country_location=country_location)

    age_brackets_filepath = sp.get_census_age_brackets_path(
        datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(
        datadir, sheet_name='United States of America')

    single_year_age_distr = {}
    for n in range(101):
        single_year_age_distr[n] = float(1.0 / 101.0)

    homes_dic, homes = sp.generate_all_households(
        N, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets,
        age_by_brackets_dic, contact_matrix_dic, single_year_age_distr)
    assert homes_dic, homes is not None
    syn_ages, syn_sexes = sp.get_usa_age_sex_n(location, state_location,
                                               totalpop)
    syn_age_count = Counter(syn_ages)
    syn_age_distr = sp.norm_dic(Counter(syn_ages))

    N = Nhomes
    hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(
        N, household_size_distr)
    totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes)

    print(totalpop, 'pop')

    hha_df = sp.get_household_head_age_by_size_df(datadir, country_location,
                                                  use_bayesian)
    hha_brackets = sp.get_head_age_brackets(datadir, country_location,
                                            use_bayesian)
    hha_by_size = sp.get_head_age_by_size_distr(datadir, country_location,
                                                use_bayesian)

    homes_dic, homes = sp.generate_all_households(hh_sizes, hha_by_size,
                                                  hha_brackets, age_brackets,
                                                  age_by_brackets_dic,
                                                  contact_matrix_dic,
                                                  syn_age_distr)

    c = 0
    for s in homes_dic:
        c += s * len(homes_dic[s])
    print('c', c)

    homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes)
Esempio n. 4
0
def check_household_head(pop,
                         n,
                         datadir,
                         figdir,
                         state_location=None,
                         country_location=None,
                         file_path=None,
                         use_default=False,
                         test_prefix="",
                         do_close=True):
    """
    Check the household head by age distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "household_head")

    household_head_age_distribution_by_family_size = sp.get_head_age_by_size_distr(
        state_location=state_location, country_location=country_location)
    head_age_brackets = sp.get_head_age_brackets(
        state_location=state_location, country_location=country_location)
    # Inverse the mapping for use below
    hha_index = sp.get_index_by_brackets_dic(head_age_brackets)

    household_head_age_distribution_by_family_size = household_head_age_distribution_by_family_size[
        1:]
    expected_hh_ages = pandas.DataFrame(
        household_head_age_distribution_by_family_size)
    expected_hh_ages_percentage = expected_hh_ages.div(
        expected_hh_ages.sum(axis=0), axis=1)
    actual_hh_ages_percetnage = utilities.get_household_head_age_size(
        pop, index=hha_index)
    expected_values = expected_hh_ages_percentage.values[1:, :]
    actual_values = actual_hh_ages_percetnage.values
    xlabels = [
        f'{min(head_age_brackets[bracket_index])}-{max(head_age_brackets[bracket_index])}'
        for bracket_index in head_age_brackets.keys()
    ]
    family_sizes = [i + 2 for i in range(0, len(expected_hh_ages_percentage))]
    utilities.plot_heatmap(
        expected_values,
        actual_values,
        xlabels,
        family_sizes,
        'Head of Household Age',
        'Household Size',
        # expected_hh_ages_percentage.columns, # family_sizes,
        testprefix="household_head_age_family_size " + test_prefix,
        figdir=figdir,
        do_close=do_close)
Esempio n. 5
0
    syn_ages, syn_sexes = sp.get_usa_age_sex_n(datadir, location,
                                               state_location,
                                               country_location, totalpop)
    syn_age_count = Counter(syn_ages)
    syn_age_distr = sp.norm_dic(Counter(syn_ages))

    N = Nhomes
    hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(
        N, household_size_distr)
    totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes)

    print(totalpop, 'pop')

    hha_df = sp.get_household_head_age_by_size_df(datadir, state_location,
                                                  country_location)
    hha_brackets = sp.get_head_age_brackets(datadir,
                                            country_location=country_location)
    hha_by_size = sp.get_head_age_by_size_distr(
        datadir, country_location=country_location)

    homes_dic, homes = sp.generate_all_households(N, hh_sizes, hha_by_size,
                                                  hha_brackets, age_brackets,
                                                  age_by_brackets_dic,
                                                  contact_matrix_dic,
                                                  deepcopy(syn_age_distr))
    homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes)
    new_ages_count = Counter(age_by_uid_dic.values())

    fig = plt.figure(figsize=(6, 4))
    ax = fig.add_subplot(111)

    x = np.arange(100)