Python norm_dic Examples, synthpops.norm_dic Python Examples

Example #1

0

Show file

def test_employment_age_distribution(do_show, do_save, create_sample_pop_e2e,
                                     get_fig_dir_by_module):
    sp.logger.info(
        "Test employment age distribution vs the employment_rates_by_age.dat")

    plotting_kwargs = sc.objdict(do_show=do_show,
                                 do_save=do_save,
                                 figdir=get_fig_dir_by_module)
    actual_employment_age_count = create_sample_pop_e2e.count_employment_by_age(
    )
    total_employee = sum(actual_employment_age_count.values())
    expected_employment_age_dist = sp.norm_dic(
        sp.get_employment_rates(**create_sample_pop_e2e.loc_pars))

    expected_employment_age_count = {
        i: round(expected_employment_age_dist[i] * total_employee)
        for i in expected_employment_age_dist
    }

    # generate list of ages based on the actual count
    generated_actual = sum([[i] * actual_employment_age_count[i]
                            for i in actual_employment_age_count], [])
    generated_expected = sum([[i] * expected_employment_age_count[i]
                              for i in expected_employment_age_count], [])
    # run statistical tests for employment by age distribution
    # TODO: Need to refine the data for fair comparison
    sp.statistic_test(expected=generated_expected,
                      actual=generated_actual,
                      test=st.kstest,
                      verbose=True)
    # plot enrollment by age
    create_sample_pop_e2e.plot_employment_rates_by_age(**plotting_kwargs)

Example #2

0

Show file

def test_work_size_distribution(do_show, do_save, create_sample_pop_e2e,
                                get_fig_dir_by_module):
    sp.logger.info(
        "Test workplace size distribution vs the work_size_count.dat")

    plotting_kwargs = sc.objdict(do_show=do_show,
                                 do_save=do_save,
                                 figdir=get_fig_dir_by_module)

    workplace_brackets_index = sp.get_index_by_brackets(
        sp.get_workplace_size_brackets(**create_sample_pop_e2e.loc_pars))

    actual_workplace_sizes = create_sample_pop_e2e.count_workplace_sizes()
    # count the workplaces by size bracket

    actual_count = {k: 0 for k in set(workplace_brackets_index.values())}
    for i in workplace_brackets_index:
        actual_count[
            workplace_brackets_index[i]] += actual_workplace_sizes.get(i, 0)

    expected_distr = sp.norm_dic(
        sp.get_workplace_size_distr_by_brackets(
            **create_sample_pop_e2e.loc_pars))

    # calculate expected count by using actual number of workplaces
    expected_count = {
        k: expected_distr[k] * sum(actual_count.values())
        for k in expected_distr
    }
    # perform statistical check
    sp.statistic_test([expected_count[i] for i in sorted(expected_count)],
                      [actual_count[i] for i in sorted(actual_count)])

    create_sample_pop_e2e.plot_workplace_sizes(**plotting_kwargs)

Example #3

0

Show file

def plot_age_dist(datadir, pop, pars, do_show, testprefix):
    sp.logger.info(
        "Plot the expected age distribution and the generated age distribution."
    )

    age_brackets = sp.get_census_age_brackets(
        datadir,
        country_location=pars['country_location'],
        state_location=pars['state_location'],
        location=pars['location'])
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    if pars['smooth_ages']:
        expected_age_distr = sp.get_smoothed_single_year_age_distr(
            datadir,
            location=pars['location'],
            state_location=pars['state_location'],
            country_location=pars['country_location'],
            window_length=pars['window_length'])

    else:
        expected_age_distr = sp.get_smoothed_single_year_age_distr(
            datadir,
            location=pars['location'],
            state_location=pars['state_location'],
            country_location=pars['country_location'],
            window_length=1)

    gen_age_count = dict.fromkeys(expected_age_distr.keys(), 0)

    for i, person in pop.items():
        gen_age_count[person['age']] += 1

    gen_age_distr = sp.norm_dic(gen_age_count)

    fig, ax = sppl.plot_array(
        [v * 100 for v in expected_age_distr.values()],
        generated=[v * 100 for v in gen_age_distr.values()],
        do_show=False,
        binned=True,
        testprefix=testprefix.replace('_', ' '))
    ax.set_xlabel('Ages')
    ax.set_ylabel('Distribution (%)')
    ax.set_ylim(bottom=0)
    ax.set_xlim(-1.5, max(age_by_brackets_dic.keys()) + 1.5)
    ax.set_title(
        f"Age Distribution of {pars['location'].replace('_', ' ')}: {pars['household_method'].replace('_', ' ')} method"
    )
    fig.set_figheight(4)  # reset the figure size
    fig.set_figwidth(7)

    return fig, ax

Example #4

0

Show file

File: utilities.py Project: RCarter-IDM/synthpops

def get_age_distribution_from_pop(pop, brackets, normalized=True):
    """
    Get age distribution from the population dictionary

    Args:
        pop: population dictionary
        brackets: age brackets
        normalized: weather the result is normalized, default to True

    Returns:
        a dictionary with age distribution by brackets
    """
    ageindex = sp.get_age_by_brackets_dic(brackets)
    actual_age_dist = dict.fromkeys(list(range(0, len(brackets))), 0)
    for p in pop.values():
        actual_age_dist[ageindex[p['age']]] += 1
    if normalized:
        actual_values = np.array(list(sp.norm_dic(actual_age_dist).values()))
    else:
        actual_values = np.array(list(actual_age_dist.values()))
    return actual_values

Example #5

0

Show file

File: example_processing_us_ltcf_state_data.py Project: MeWu-IDM/synthpops-1

ltcf_rates_by_age = sp.get_long_term_care_facility_use_rates(
    datadir, state_location=state_location, country_location=country_location)

# use the data to estimate the number of long term care facility users for a local region and a given population size
local_population_size = 225e3
location = 'Seattle-Tacoma-Bellevue-WA-Metro-Area'
location = 'Washington'

local_age_distr, local_age_brackets = sp.process_us_census_age_counts(
    datadir,
    location,
    state_location,
    country_location,
    year=2018,
    acs_period=acs_period)
local_age_distr = sp.norm_dic(local_age_distr)

local_users = {}

for a in sorted(ltcf_rates_by_age.keys()):
    b = age_by_brackets_dic[a]
    local_users.setdefault(b, 0)
    local_users[b] += local_population_size * local_age_distr[b] / len(
        local_age_brackets[b]) * ltcf_rates_by_age[a]

print(
    f'Total long term care facility users for {location} with population size {local_population_size:.0f} is: {sum(local_users.values()):.0f}.'
)

# use the Kaiser Health News data to estimate ratios
# location_alias = 'seattle_metro'  # saving data only for Seattle

Example #6

0

Show file

File: scratch_generate_contact_networks.py Project: MeWu-IDM/synthpops-1

                                                       state_location,
                                                       country_location,
                                                       use_bayesian)
    print(household_size_distr)

    Nhomes_to_sample_smooth = 100000
    hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth,
                                           household_size_distr)
    totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes)

    # hh_sizes = sp.generate_household_sizes(Nhomes,household_size_distr)

    syn_ages, syn_sexes = sp.get_usa_age_sex_n(location, state_location,
                                               totalpop)
    syn_age_count = Counter(syn_ages)
    syn_age_distr = sp.norm_dic(Counter(syn_ages))

    N = Nhomes
    hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(
        N, household_size_distr)
    totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes)

    print(totalpop, 'pop')

    hha_df = sp.get_household_head_age_by_size_df(datadir, country_location,
                                                  use_bayesian)
    hha_brackets = sp.get_head_age_brackets(datadir, country_location,
                                            use_bayesian)
    hha_by_size = sp.get_head_age_by_size_distr(datadir, country_location,
                                                use_bayesian)

Example #7

0

Show file

File: test_summary_methods.py Project: TTI-modelling/synthpops

def test_information_in_generation():
    """
    Basic tests that summaries are produced produced when synthpops generates
    populations. Summaries are stored and accessed via sp.Pop().information.
    """
    sp.logger.info("Test summaries are produced when populations are generated.")
    sp.logger.info("Temporary basic tests. To be reorganized and converted to plotting based tests.\n")

    pop = sp.Pop(**pars)

    # check age_count information
    assert isinstance(pop.information.age_count, dict), "Check failed"
    print(f"Check passed. Age count information exists and is a dictionary. The age range is from {min(pop.information.age_count.keys())} to {max(pop.information.age_count.keys())} years old.")

    assert sum(pop.information.age_count.values()) == pop.n, f"Check failed. The sum of pop.age_count ({sum(pop.information.age_count.values())}) does not equal the population size ({pop.n})."
    print(f"Check passed. Age count information of the generated population matches the expected size ({pop.n}).\n")

    # check household size information
    assert sum(pop.information.household_size_count.values()) > 0, "Check failed. No people placed into unique households."
    print("Check passed. Household sizes exist in pop object and is a dictionary by household id (hhid).")

    assert sum(pop.information.household_sizes.values()) == sum([pop.information.household_size_count[k] * k for k in pop.information.household_size_count]), "Household sizes information check failed."
    print("Check passed. Household sizes information check passed.\n")

    # check household size distribution
    household_size_dist = sp.get_generated_household_size_distribution(pop.information.household_sizes)
    expected_household_size_dist = sp.get_household_size_distr(**pop.loc_pars)
    if expected_household_size_dist[1] > 0:
        assert household_size_dist[1] > 0, "Check failed. No one lives alone even though the expected household size distribution says otherwise."
        print("Check passed. At least some people live alone as expected from the household size distribution.")

    # check household head information
    assert min(pop.information.household_head_ages.values()) >= 18, "Check failed. Min head of household age is younger than 18 years old."
    print("Check passed. All heads of households are at least 18 years old.")

    # check household head age count information
    assert sum(pop.information.household_head_age_count.values()) == len(pop.information.household_sizes), "Check on count of household head ages failed."
    print("Check passed. The count of household head ages matches the number of households created.\n")

    # check ltcf information
    assert sum(pop.information.ltcf_sizes.values()) > 0, "Check failed. No people placed in ltcfs."
    print("Check passed. Ltcfs created.")

    # count only LTCF residents
    ltcf_sizes_res = pop.get_ltcf_sizes(keys_to_exclude=['ltcf_staff'])
    assert sum(ltcf_sizes_res.values()) < sum(pop.information.ltcf_sizes.values()), "Check failed. Ltcf residents is greater than or equal to all people in ltcfs."
    print("Check passed. Ltcf residents created separately.")

    # check that those living in households or LTCFs account for the entire expected population
    assert sum(pop.information.household_sizes.values()) + sum(ltcf_sizes_res.values()) == pop.n, f"Check failed. Population size is {pop.n} and the sum of people generated living in households and ltcfs is {sum(pop.information.household_sizes.values()) + sum(ltcf_sizes_res.values())}."
    print("Check passed. Everyone lives either in a household or ltcf.")

    # count only LTCF staff
    ltcf_sizes_staff = pop.get_ltcf_sizes(keys_to_exclude=['ltcf_res'])
    assert sum(ltcf_sizes_res.values()) + sum(ltcf_sizes_staff.values()) == sum(pop.information.ltcf_sizes.values()), "Check failed. The sum of ltcf residets and staff counted separately does not equal the count of them together."
    print("Check passed. Ltcf staff created separately.\n")

    # check enrollment count by age
    assert sum(pop.information.enrollment_by_age.values()) > 0, f"Check failed. Student enrollment is less than or equal to 0 ({sum(pop.enrollment_by_age.values())})."
    print("Check passed. Student enrollment count by age exists and is greater than 0.")

    # check enrollment rates by age
    enrollment_rates_by_age = pop.enrollment_rates_by_age  # a property rather than stored data so make a copy here
    assert 0 < enrollment_rates_by_age[10] <= 1., f"Check failed. Enrollment rate for age 10 is less than or equal to 0 ({enrollment_rates_by_age[10]}."
    print(f"Check passed. Enrollment rate for age 10 is {enrollment_rates_by_age[10] * 100:.2f}%.\n")

    # check employment rates by age
    employment_rates_by_age = pop.employment_rates_by_age  # a property rather than stored data so make a copy here
    assert 0 < employment_rates_by_age[25] <= 1., f"Check failed. Employment rate for age 25 is less than or equal to 0 ({employment_rates_by_age[25]})."
    print(f"Check passed. Employment rate for age 25 is {employment_rates_by_age[25] * 100:.2f}%.")

    # check workplace sizes
    assert sum(pop.information.workplace_sizes.values()) > 0, "Check failed. Sum of workplace sizes is less than or equal to 0."
    print("Workplace sizes exists in pop object and is a dictionary by workplace id (wpid).")

    workplace_size_brackets = sp.get_workplace_size_brackets(**pop.loc_pars)

    # check that bins and bin labels can be made
    workplace_size_bins = sp.get_bin_edges(workplace_size_brackets)
    assert len(workplace_size_bins) >= 2, "Check failed. workplace size bins contains the limits for less than one bin."
    print(f"Check passed. There are {len(workplace_size_bins) - 1} workplace size bins.")

    # check that bin labels are all strings
    workplace_size_bin_labels = sp.get_bin_labels(workplace_size_brackets)
    label_types = list(set([type(bl) for bl in workplace_size_bin_labels]))

    assert len(label_types) == 1, f"Check failed. There is more than one type for the workplace size bin labels generated."
    print("Check passed. There is only one type for workplace size bin labels generated.")

    assert isinstance(workplace_size_bin_labels[0], str), f"Check failed. Bin labels are not strings."
    print("Check passed. Bin labels are strings.")

    workplace_size_dist = sp.get_generated_workplace_size_distribution(pop.information.workplace_sizes, workplace_size_bins)
    expected_workplace_size_dist = sp.norm_dic(sp.get_workplace_size_distr_by_brackets(sp.settings.datadir, state_location=pop.state_location, country_location=pop.country_location))
    if expected_workplace_size_dist[0] > 0:
        assert workplace_size_dist[0] > 0, f"Check failed. Expected some workplaces to be created in the smallest bin size but there are none in this bin."
        print("Check passed for workplaces in the smallest bin.")

Example #8

0

Show file

uids_in_school = {syn_school_uids[n]: syn_school_ages[n] for n in range(len(syn_school_uids))}

# print(uids_in_school)

uids_in_school_by_age = {}
for a in range(100):
    uids_in_school_by_age[a] = []
for uid in uids_in_school:
    a = uids_in_school[uid]
    uids_in_school_by_age[a].append(uid)
ages_in_school_count = dict(Counter(syn_school_ages))
for a in range(100):
    if a not in ages_in_school_count:
        ages_in_school_count[a] = 0
ages_in_school_distr = sp.norm_dic(ages_in_school_count)


achoice = np.random.multinomial(1, [ages_in_school_distr[a] for a in ages_in_school_distr])
aindex = np.where(achoice)[0][0]


syn_schools, syn_school_uids, syn_school_types = sp.send_students_to_school_with_school_types(school_size_distr_by_type,
                                                                                              school_size_brackets,
                                                                                              uids_in_school,
                                                                                              uids_in_school_by_age,
                                                                                              ages_in_school_count,
                                                                                              school_types_by_age,
                                                                                              school_type_age_ranges,
                                                                                              verbose=False)

Example #9

0

Show file

File: utilities_dist.py Project: TTI-modelling/synthpops

def check_work_size_distribution(pop,
                                 n,
                                 datadir,
                                 figdir,
                                 location=None,
                                 state_location=None,
                                 country_location=None,
                                 file_path=None,
                                 use_default=False,
                                 test_prefix="",
                                 skip_stat_check=False,
                                 do_close=True):
    """
    Check the population workplace size distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "work_size")
    wb = sp.get_workplace_size_brackets(datadir=datadir,
                                        location=location,
                                        state_location=state_location,
                                        country_location=country_location,
                                        file_path=file_path,
                                        use_default=use_default)
    ws = sp.norm_dic(
        sp.get_workplace_size_distr_by_brackets(
            datadir=datadir,
            location=location,
            state_location=state_location,
            country_location=country_location,
            file_path=file_path,
            use_default=use_default))
    ws_index = sp.get_index_by_brackets_dic(wb)
    upper_bound = max(ws_index.keys())
    actual_work_dist, actual_work_dist_none = utilities.get_ids_count_by_param(
        pop, "wpid")
    actual_worksizes = {}
    for v in actual_work_dist.values():
        if v > upper_bound:
            v = upper_bound
        actual_worksizes.setdefault(ws_index[v], 0)
        actual_worksizes[ws_index[v]] += 1

    actual_values = np.zeros(len(ws.keys()))
    for i in range(0, len(ws.keys())):
        if i in actual_worksizes:
            actual_values[i] = actual_worksizes[i]
    actual_values = actual_values / np.nansum(actual_values)
    expected_values = np.array(list(ws.values()))
    xlabels = [str(wb[b][0]) + '-' + str(wb[b][-1]) for b in sorted(wb.keys())]
    utilities.plot_array(expected_values,
                         actual_values,
                         names=xlabels,
                         datadir=figdir,
                         testprefix="work size distribution " + test_prefix,
                         do_close=do_close,
                         xlabel_rotation=50)
    if not skip_stat_check:
        utilities.statistic_test(expected_values,
                                 actual_values,
                                 test="x",
                                 comments="work size distribution check")

Example #10

0

Show file

File: utilities_dist.py Project: TTI-modelling/synthpops

def check_school_size_distribution(pop,
                                   n,
                                   datadir,
                                   figdir,
                                   location=None,
                                   state_location=None,
                                   country_location=None,
                                   file_path=None,
                                   use_default=False,
                                   test_prefix="",
                                   skip_stat_check=True,
                                   do_close=True,
                                   school_type=None):
    """
    Check the school size distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state
        country_location : name of the country the state_location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True
        school_type      : list of school types e.g. ['pk', 'es', 'ms', 'hs', 'uv']

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "school_size")
    sb = sp.get_school_size_brackets(datadir=datadir,
                                     location=location,
                                     state_location=state_location,
                                     country_location=country_location,
                                     file_path=file_path,
                                     use_default=use_default)
    sb_index = sp.get_index_by_brackets_dic(sb)

    expected_school_size_by_brackets = sp.get_school_size_distr_by_brackets(
        datadir=datadir,
        location=location,
        state_location=state_location,
        country_location=country_location)
    actual_school, actual_school_none = utilities.get_ids_count_by_param(
        pop, "scid")
    actual_school_student_only, actual_school_none_student_only = utilities.get_ids_count_by_param(
        pop, "sc_student", "scid")
    actual_per_school_type_dict = {}
    actual_per_school_type_dict_student_only = {}
    actual_per_school_type_dict["all"] = actual_school
    actual_per_school_type_dict_student_only[
        "all"] = actual_school_student_only
    if school_type is not None:
        for sc in school_type:
            actual_per_school_type_dict[sc] = \
                utilities.get_ids_count_by_param(pop, "sc_type", param="scid", condition_value=sc)[0]
            actual_per_school_type_dict_student_only[sc] = \
                utilities.get_ids_count_by_param(pop, "sc_type", param="scid", condition_value=sc, filter_expression={'sc_student':'1'})[0]

    # get individual school type size distribution
    for k in actual_per_school_type_dict:
        actual_scount = dict(Counter(actual_per_school_type_dict[k].values()))
        actual_scount_student_only = dict(
            Counter(actual_per_school_type_dict_student_only[k].values()))
        actual_school_size_by_brackets = sp.norm_dic(
            utilities.get_bucket_count(sb_index, sb, actual_scount))
        expected_values = np.array(
            list(expected_school_size_by_brackets.values()))
        actual_values = np.array(list(actual_school_size_by_brackets.values()))
        utilities.plot_array(expected_values,
                             actual_values,
                             names=sb.keys(),
                             datadir=figdir,
                             testprefix="school size " + test_prefix + " " + k,
                             do_close=do_close)
        utilities.plot_array(
            actual_per_school_type_dict[k].values(),
            datadir=figdir,
            expect_label=
            f"school count: total {len(actual_per_school_type_dict[k])}",
            testprefix="school size total\n" + test_prefix + " " + k,
            binned=False,
            do_close=do_close)
        utilities.plot_array(
            actual_per_school_type_dict_student_only[k].values(),
            datadir=figdir,
            expect_label=
            f"school count: total {len(actual_per_school_type_dict[k])}",
            testprefix="school size total (student only)\n" + test_prefix +
            " " + k,
            binned=False,
            do_close=do_close)
        # statistic_test is not working yet because school sizes are now available by school type. Also depends strongly on population size.
        if not skip_stat_check:
            utilities.statistic_test(expected_values,
                                     actual_values,
                                     test="x",
                                     comments="school size check")
        # check average school size
        school_size_brackets = sp.get_school_size_brackets(
            datadir=datadir,
            location=location,
            country_location=country_location,
            state_location=state_location)
        # calculate the average school size per bracket
        average_school_size_in_bracket = [
            sum(i) / len(i) for i in school_size_brackets.values()
        ]

        # calculate expected school size based on expected value sum(distribution * size)
        expected_average_school_size = sum([
            v[1] * average_school_size_in_bracket[v[0]]
            for v in expected_school_size_by_brackets.items()
        ])
        actual_average_school_size = sum(
            [i * actual_scount[i]
             for i in actual_scount]) / sum(actual_scount.values())
        utilities.check_error_percentage(n,
                                         expected_average_school_size,
                                         actual_average_school_size,
                                         name=f"average school size:'{k}'")
    # check school count distribution
    utilities.plot_array([
        len(actual_per_school_type_dict[i])
        for i in actual_per_school_type_dict
    ],
                         names=list(actual_per_school_type_dict.keys()),
                         datadir=figdir,
                         expect_label="school count",
                         testprefix="school count " + test_prefix,
                         value_text=True)

Example #11

0

Show file

File: utilities_dist.py Project: TTI-modelling/synthpops

def check_household_distribution(pop,
                                 n,
                                 datadir,
                                 figdir,
                                 location=None,
                                 state_location=None,
                                 country_location=None,
                                 file_path=None,
                                 use_default=False,
                                 test_prefix="",
                                 skip_stat_check=False,
                                 do_close=True):
    """
    Check the household size distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "household")
    hs = sp.get_household_size_distr(datadir=datadir,
                                     location=location,
                                     state_location=state_location,
                                     country_location=country_location,
                                     file_path=file_path,
                                     use_default=use_default)
    actual_households, actual_households_none = utilities.get_ids_count_by_param(
        pop, "hhid")
    assert actual_households_none == {}, "all entries must have household ids"
    actual_household_count = dict(Counter(actual_households.values()))
    sorted_actual_household_count = {}
    for i in sorted(actual_household_count):
        sorted_actual_household_count[i] = actual_household_count[i]
    actual_values = np.array(
        list(sp.norm_dic(sorted_actual_household_count).values()))
    expected_values = np.array(list(hs.values()))
    utilities.plot_array(expected_values,
                         actual_values,
                         names=[x for x in list(hs.keys())],
                         datadir=figdir,
                         testprefix="household count percentage " +
                         test_prefix,
                         do_close=do_close,
                         value_text=True)

    if not skip_stat_check:
        utilities.statistic_test(expected_values,
                                 actual_values,
                                 test="x",
                                 comments="household count percentage check")
    # check average household size
    expected_average_household_size = round(
        sum([(i + 1) * expected_values[np.where(i)]
             for i in expected_values])[0], 3)
    actual_average_household_size = round(
        sum([(i + 1) * actual_values[np.where(i)] for i in actual_values])[0],
        3)
    print(
        f"expected average household size: {expected_average_household_size}\n"
        f"actual average household size: {actual_average_household_size}")
    utilities.check_error_percentage(n,
                                     expected_average_household_size,
                                     actual_average_household_size,
                                     name="average household size")