def test_webapp_synthpops_calls(n=default_n,
                                location='seattle_metro',
                                state_location='Washington',
                                country_location='usa',
                                sheet_name='United States of America'):
    datadir = sp.datadir

    sp.read_age_bracket_distr(datadir,
                              location=location,
                              state_location=state_location,
                              country_location=country_location)
    sp.get_census_age_brackets(datadir,
                               state_location=state_location,
                               country_location=country_location)
    num_agebrackets = 16

    n_contacts_dic = {'H': 4.11, 'S': 11.41, 'W': 8.07, 'C': 7}

    contact_matrix_dic = sp.get_contact_matrix_dic(datadir,
                                                   sheet_name=sheet_name)
    contact_matrix_dic['M'] = sp.combine_matrices(contact_matrix_dic,
                                                  n_contacts_dic,
                                                  num_agebrackets)

    for k in contact_matrix_dic:
        print(contact_matrix_dic[k].shape)

    n = int(n)
    sp.get_age_n(datadir,
                 n=default_n,
                 location=location,
                 state_location=state_location,
                 country_location=country_location)

    return
Exemple #2
0
def get_average_contact_by_age(pop,
                               datadir,
                               state_location="Washington",
                               country_location="usa",
                               setting_code="H",
                               decimal=3):
    """
    Helper method to get average contacts by age brackets
    Args:
        pop              : population dictionary
        datadir          : data directory to look up reference data
        state_location   : state location
        country_location : country location
        setting_code     : contact layer code, can be "H", "W", "S"
        decimal          : digits for rounding, default to 3

    Returns:
        numpy.ndarray: A numpy array with average contacts by age brackets.

    """
    brackets = sp.get_census_age_brackets(datadir, state_location,
                                          country_location)
    ageindex = sp.get_age_by_brackets_dic(brackets)
    total = np.zeros(len(brackets))
    contacts = np.zeros(len(brackets))
    for p in pop.values():
        total[ageindex[p["age"]]] += 1
        contacts[ageindex[p["age"]]] += len(p["contacts"][setting_code])
    average = np.round(np.divide(contacts, total), decimals=decimal)
    return average
Exemple #3
0
def test_age_brackets_used_with_contact_matrix():
    """
    Test that the age brackets used in sp.Pop.generate() matches the contact matrices used.

    Note:
        This is a test to ensure that within sp.Pop.generate() uses the right age brackets. By default, without specifying nbrackets in sp.get_census_age_brackets(), the number of age brackets will not match the granularity of the contact matrix.

    """

    sp.logger.info(
        "Test that the age brackets used in sp.Pop.generate() with the contact matrices have the same number of bins as the contact matrices."
    )

    pop_obj = sp.Pop(**pars)
    sheet_name = pop_obj.sheet_name
    pop = pop_obj.to_dict(
    )  # this is basically what sp.make_population does...

    contact_matrix_dic = sp.get_contact_matrix_dic(sp.datadir,
                                                   sheet_name=sheet_name)
    contact_matrix_nbrackets = contact_matrix_dic[list(
        contact_matrix_dic.keys())[0]].shape[0]
    cm_age_brackets = sp.get_census_age_brackets(
        sp.datadir,
        country_location=pop_obj.country_location,
        state_location=pop_obj.state_location,
        location=pop_obj.location,
        nbrackets=contact_matrix_nbrackets)
    assert contact_matrix_nbrackets == len(
        cm_age_brackets
    ), f'Check failed, len(contact_matrix_nbrackets): {contact_matrix_nbrackets} does not match len(cm_age_brackets): {len(cm_age_brackets)}.'
    print(
        f'Check passed. The age brackets loaded match the number of age brackets for the contact matrices used for the location.'
    )
Exemple #4
0
def rebin_matrix_by_age(matrix,
                        datadir,
                        location="seattle_metro",
                        state_location="Washington",
                        country_location="usa"):
    """
    Helper method to get the average of contact matrix by age brackets
    @TODO: should we merge the functionalities with sp.get_aggregate_matrix
    or remove as this operation may not be scientifically meaningful (?)

    Args:
        matrix           : raw matrix with single age bracket
        datadir          : data directory
        state_location   : state location
        country_location : country location

    Returns:
        numpy.ndarray: A matrix with desired age bracket with average values for all cells.

    """
    brackets = sp.get_census_age_brackets(datadir, location, state_location,
                                          country_location)
    ageindex = sp.get_age_by_brackets_dic(brackets)
    agg_matrix = sp.get_aggregate_matrix(matrix, ageindex)
    counter = Counter(ageindex.values())  # number of ageindex per bracket
    for i in range(0, len(counter)):
        for j in range(0, len(counter)):
            agg_matrix[i, j] /= (counter[i] * counter[j])
    return agg_matrix
def test_plot_generated_trimmed_contact_matrix(setting_code='H', n=5000, aggregate_flag=True, logcolors_flag=True,
                                               density_or_frequency='density'):
    datadir = sp.datadir

    state_location = 'Washington'
    location = 'seattle_metro'
    country_location = 'usa'

    popdict = {}

    options_args = {'use_microstructure': True}
    network_distr_args = {'Npop': int(n)}
    contacts = sp.make_contacts(popdict, state_location=state_location, location=location, options_args=options_args,
                                network_distr_args=network_distr_args)
    contacts = sp.trim_contacts(contacts, trimmed_size_dic=None, use_clusters=False)

    age_brackets = sp.get_census_age_brackets(datadir, state_location=state_location, country_location=country_location)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    ages = []
    for uid in contacts:
        ages.append(contacts[uid]['age'])

    age_count = Counter(ages)
    aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic)

    freq_matrix_dic = sp.calculate_contact_matrix(contacts, density_or_frequency)

    fig = sp.plot_contact_frequency(freq_matrix_dic, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic,
                                    setting_code, density_or_frequency, logcolors_flag, aggregate_flag)

    return fig
Exemple #6
0
def test_age_brackets_used_with_contact_matrix():
    """
    Test that the age brackets used in sp.Pop.generate() matches the contact matrices used.

    Note:
        This is a test to ensure that within sp.Pop.generate() uses the right age brackets. By default, without specifying nbrackets in sp.get_census_age_brackets(), the number of age brackets will not match the granularity of the contact matrix.

    """

    sp.logger.info(
        "Test that the age brackets used in sp.Pop.generate() with the contact matrices have the same number of bins as the contact matrices."
    )

    pop = sp.Pop(**pars)
    sheet_name = pop.sheet_name

    loc_pars = pop.loc_pars

    contact_matrices = sp.get_contact_matrices(sp.settings.datadir,
                                               sheet_name=sheet_name)
    contact_matrix_nbrackets = contact_matrices[list(
        contact_matrices.keys())[0]].shape[0]
    cm_age_brackets = sp.get_census_age_brackets(
        **sc.mergedicts(loc_pars, {'nbrackets': contact_matrix_nbrackets}))
    assert contact_matrix_nbrackets == len(
        cm_age_brackets
    ), f'Check failed, len(contact_matrix_nbrackets): {contact_matrix_nbrackets} does not match len(cm_age_brackets): {len(cm_age_brackets)}.'
    print(
        f'Check passed. The age brackets loaded match the number of age brackets for the contact matrices used for the location.'
    )
Exemple #7
0
 def get_census_age_brackets(self):
     census_age_brackets = sp.get_census_age_brackets(
         sp.datadir,
         state_location=self.d_state_location,
         country_location=self.d_country_location)
     int_age_brackets = {}
     for k in census_age_brackets:
         int_age_brackets[k] = list(census_age_brackets[k])
     return int_age_brackets
Exemple #8
0
def plot_generated_trimmed_contact_matrix(datadir,
                                          n,
                                          location='seattle_metro',
                                          state_location='Washington',
                                          country_location='usa',
                                          setting_code='H',
                                          aggregate_flag=True,
                                          logcolors_flag=True,
                                          density_or_frequency='density',
                                          trimmed_size_dic=None):

    popdict = {}

    options_args = {'use_microstructure': True}
    network_distr_args = {'Npop': int(n)}
    contacts = sp.make_contacts(popdict,
                                country_location=country_location,
                                state_location=state_location,
                                location=location,
                                options_args=options_args,
                                network_distr_args=network_distr_args)
    contacts = sp.trim_contacts(contacts,
                                trimmed_size_dic=trimmed_size_dic,
                                use_clusters=False)

    age_brackets = sp.get_census_age_brackets(
        datadir,
        state_location=state_location,
        country_location=country_location)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    ages = []
    for uid in contacts:
        ages.append(contacts[uid]['age'])

    num_agebrackets = len(age_brackets)

    age_count = Counter(ages)
    aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic)

    symmetric_matrix = calculate_contact_matrix(contacts, density_or_frequency,
                                                setting_code)

    fig = plot_contact_matrix(symmetric_matrix,
                              age_count,
                              aggregate_age_count,
                              age_brackets,
                              age_by_brackets_dic,
                              setting_code=setting_code,
                              density_or_frequency=density_or_frequency,
                              logcolors_flag=logcolors_flag,
                              aggregate_flag=aggregate_flag)
    return fig
    def get_pop_details(self,
                        pop,
                        dir,
                        title_prefix,
                        location,
                        state_location,
                        country_location,
                        decimal=3):
        os.makedirs(dir, exist_ok=True)
        for setting_code in ['H', 'W', 'S']:
            average_contacts = utilities.get_average_contact_by_age(
                pop, self.datadir, setting_code=setting_code, decimal=decimal)
            fmt = f'%.{str(decimal)}f'
            # print(f"expected contacts by age for {code}:\n", average_contacts)
            utilities.plot_array(
                average_contacts,
                datadir=self.figDir,
                testprefix=
                f"{self.n}_seed_{self.seed}_{setting_code}_average_contacts",
                expect_label='Expected' if self.generateBaseline else 'Test')
            sc.savejson(os.path.join(
                dir,
                f"{self.n}_seed_{self.seed}_{setting_code}_average_contact.json"
            ),
                        dict(enumerate(average_contacts.tolist())),
                        indent=2)

            for type in ['density', 'frequency']:
                matrix = sp.calculate_contact_matrix(pop, type, setting_code)
                brackets = sp.get_census_age_brackets(self.datadir,
                                                      state_location,
                                                      country_location)
                ageindex = sp.get_age_by_brackets_dic(brackets)
                agg_matrix = sp.get_aggregate_matrix(matrix, ageindex)
                np.savetxt(os.path.join(
                    dir,
                    f"{self.n}_seed_{self.seed}_{setting_code}_{type}_contact_matrix.csv"
                ),
                           agg_matrix,
                           delimiter=",",
                           fmt=fmt)
                fig = plot_age_mixing_matrices.test_plot_generated_contact_matrix(
                    setting_code=setting_code,
                    population=pop,
                    title_prefix=" Expected "
                    if self.generateBaseline else " Test ",
                    density_or_frequency=type)
                # fig.show()
                fig.savefig(
                    os.path.join(
                        self.figDir,
                        f"{self.n}_seed_{self.seed}_{setting_code}_{type}_contact_matrix.png"
                    ))
Exemple #10
0
def test_plot_generated_trimmed_contact_matrix(setting_code='H', n=5000, aggregate_flag=True, logcolors_flag=True,
                                               density_or_frequency='density', with_facilities=False, cmap='cmr.freeze_r', fontsize=16, rotation=50):
    """
    Plot the age mixing matrix for a specific setting where the edges are trimmed.

    Args:
        setting_code (str)               : name of the physial contact setting: H for households, S for schools, W for workplaces, C for community or other
        n (int)                          : number of people in the population
        aggregate_flag (book)            : If True, plot the contact matrix for aggregate age brackets, else single year age contact matrix.
        logcolors_flag (bool)            : If True, plot heatmap in logscale
        density_or_frequency (str)       : If 'density', then each contact counts for 1/(group size -1) of a person's contact in a group, elif 'frequency' then count each contact. This means that more people in a group leads to higher rates of contact/exposure.
        with_facilities (bool)           : If True, create long term care facilities
        cmap(str or matplotlib colormap) : colormap
        fontsize (int)                   : base font size
        rotation (int)                   : rotation for x axis labels

    Returns:
        A fig object.

    """
    datadir = sp.datadir

    state_location = 'Washington'
    location = 'seattle_metro'
    country_location = 'usa'

    # popdict = {}

    options_args = {'use_microstructure': True}
    network_distr_args = {'Npop': int(n)}
    # contacts = sp.make_contacts(popdict, state_location=state_location, location=location, options_args=options_args,
    #                             network_distr_args=network_distr_args)
    # contacts = sp.trim_contacts(contacts, trimmed_size_dic=None, use_clusters=False)

    population = sp.make_population(n, generate=True, with_facilities=with_facilities)

    age_brackets = sp.get_census_age_brackets(datadir, state_location=state_location, country_location=country_location)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    ages = []
    for uid in population:
        ages.append(population[uid]['age'])

    age_count = Counter(ages)
    aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic)

    matrix = sp.calculate_contact_matrix(population, density_or_frequency, setting_code)

    fig = sp.plot_contact_matrix(matrix, age_count, aggregate_age_count, age_brackets, age_by_brackets_dic,
                                 setting_code, density_or_frequency, logcolors_flag, aggregate_flag, cmap, fontsize, rotation)

    return fig
Exemple #11
0
def plot_age_dist(datadir, pop, pars, do_show, testprefix):
    sp.logger.info(
        "Plot the expected age distribution and the generated age distribution."
    )

    age_brackets = sp.get_census_age_brackets(
        datadir,
        country_location=pars['country_location'],
        state_location=pars['state_location'],
        location=pars['location'])
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    if pars['smooth_ages']:
        expected_age_distr = sp.get_smoothed_single_year_age_distr(
            datadir,
            location=pars['location'],
            state_location=pars['state_location'],
            country_location=pars['country_location'],
            window_length=pars['window_length'])

    else:
        expected_age_distr = sp.get_smoothed_single_year_age_distr(
            datadir,
            location=pars['location'],
            state_location=pars['state_location'],
            country_location=pars['country_location'],
            window_length=1)

    gen_age_count = dict.fromkeys(expected_age_distr.keys(), 0)

    for i, person in pop.items():
        gen_age_count[person['age']] += 1

    gen_age_distr = sp.norm_dic(gen_age_count)

    fig, ax = sppl.plot_array(
        [v * 100 for v in expected_age_distr.values()],
        generated=[v * 100 for v in gen_age_distr.values()],
        do_show=False,
        binned=True,
        testprefix=testprefix.replace('_', ' '))
    ax.set_xlabel('Ages')
    ax.set_ylabel('Distribution (%)')
    ax.set_ylim(bottom=0)
    ax.set_xlim(-1.5, max(age_by_brackets_dic.keys()) + 1.5)
    ax.set_title(
        f"Age Distribution of {pars['location'].replace('_', ' ')}: {pars['household_method'].replace('_', ' ')} method"
    )
    fig.set_figheight(4)  # reset the figure size
    fig.set_figwidth(7)

    return fig, ax
Exemple #12
0
def plot_contact_matrix_after_intervention(n,
                                           n_days,
                                           interventions,
                                           intervention_name,
                                           location='seattle_metro',
                                           state_location='Washington',
                                           country_location='usa',
                                           aggregate_flag=True,
                                           logcolors_flag=True,
                                           density_or_frequency='density',
                                           setting_code='H',
                                           cmap='cmr.freeze_r',
                                           fontsize=16,
                                           rotation=50):
    """
    Args:
        intervention (cv.intervention): a single intervention
    """
    pars = sc.objdict(pop_size=n, n_days=n_days, pop_type='synthpops')

    # sim = sc.objdict()
    sim = cv.Sim(pars=pars, interventions=interventions)
    sim.run()

    age_brackets = sp.get_census_age_brackets(
        sp.datadir,
        state_location=state_location,
        country_location=country_location)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    ages = sim.people.age
    ages = np.round(ages, 1)
    ages = ages.astype(int)
    max_age = max(ages)
    age_count = Counter(ages)
    age_count = dict(age_count)
    for i in range(max_age + 1):
        if i not in age_count:
            age_count[i] = 0

    aggregate_age_count = sp.get_aggregate_ages(age_count, age_by_brackets_dic)

    matrix = calculate_contact_matrix(sim, density_or_frequency, setting_code)

    fig = sp.plot_contact_matrix(matrix, age_count, aggregate_age_count,
                                 age_brackets, age_by_brackets_dic,
                                 setting_code, density_or_frequency,
                                 logcolors_flag, aggregate_flag, cmap,
                                 fontsize, rotation)

    return fig
Exemple #13
0
 def test_seattle_age_brackets(self):
     self.is_debugging = False
     age_brackets = sp.get_census_age_brackets(datadir=sp.datadir,
                                               state_location="Washington",
                                               country_location="usa",
                                               use_default=False)
     age_brackets_json = {}
     for k in age_brackets:
         age_brackets_json[k] = age_brackets[k].tolist()
     if self.is_debugging:
         with open(f"DEBUG_{self._testMethodName}_age_brackets.json",
                   "w") as outfile:
             json.dump(age_brackets_json, outfile, indent=4)
     age_by_brackets_dic = sp.get_age_by_brackets_dic(
         age_brackets=age_brackets)
     self.verify_age_bracket_dictionary_correct(age_by_brackets_dic)
Exemple #14
0
def process_age_tables():
    """Function to preprocess age tables."""
    file_path = os.path.join(dir_path, 'Series A. Population Tables.xlsx')
    df = pd.read_excel(file_path,
                       sheet_name='A5',
                       header=1,
                       skiprows=[2, 3],
                       skipfooter=303)

    ages = df['Age in single Years'].values[1:]
    age_count = np.array(df['National'].values[1:])
    age_range = np.arange(len(ages))

    age_dist = age_count / age_count.sum()
    age_dist_mapping = dict(zip(age_range, age_dist))

    data = dict(age_min=sc.dcp(age_range),
                age_max=sc.dcp(age_range),
                age_dist=age_dist)
    data['age_max'][-1] = 100
    new_df = pd.DataFrame.from_dict(data)

    new_file_path = os.path.join(dir_path, 'Malawi_national_ages.csv')
    new_df.to_csv(new_file_path, index=False)

    census_age_brackets = sp.get_census_age_brackets(
        sp.settings.datadir,
        location='seattle-metro',
        state_location='Washington',
        country_location='usa',
        nbrackets=16)
    census_age_by_brackets = sp.get_age_by_brackets(census_age_brackets)

    agg_ages = sp.get_aggregate_ages(age_dist_mapping, census_age_by_brackets)

    agg_data = dict()
    agg_data['age_min'] = np.array(
        [census_age_brackets[b][0] for b in census_age_brackets])
    agg_data['age_max'] = np.array(
        [census_age_brackets[b][-1] for b in census_age_brackets])
    agg_data['age_dist'] = np.array(
        [agg_ages[b] for b in sorted(census_age_brackets.keys())])
    agg_df = pd.DataFrame.from_dict(agg_data)
    print(agg_df)
    agg_path = os.path.join(dir_path, 'Malawi_national_ages_16.csv')
    agg_df.to_csv(agg_path, index=False)
Exemple #15
0
    def get_census_age_brackets(self):
        """
        Helper method for get_census_age_brackets.

        Returns:
            age brackets dictionary where keys are bracket index and values are
            list of ages.
        """
        census_age_brackets = sp.get_census_age_brackets(
            # sp.datadir,
            sp.settings.datadir,
            state_location=self.d_state_location,
            country_location=self.d_country_location)
        int_age_brackets = {}
        for k in census_age_brackets:
            int_age_brackets[k] = list(census_age_brackets[k])
        return int_age_brackets
Exemple #16
0
def test_older_ages_have_household_contacts():
    """
    Test that older age groups (85+) have at least some household contacts with
    other older individuals if expected. Together, if sp.Pop.generate() uses the
    incorrect number of age brackets with the contact matrices, older age groups
    will not be generated as household contacts for each other (when we look at
    the generated contact matrix for households, the blocks between 85+ year
    olds would then be 0 for relatively large populations, even though the
    household contact matrix would have us expect otherwise.)
    """
    test_pars = sc.dcp(pars)
    test_pars['n'] = 20e3

    pop = sp.Pop(**test_pars)
    pop_dict = pop.to_dict()

    contact_matrix_dic = sp.get_contact_matrix_dic(sp.datadir,
                                                   sheet_name=pop.sheet_name)

    contact_matrix_nbrackets = contact_matrix_dic[list(
        contact_matrix_dic.keys())[0]].shape[0]
    cm_age_brackets = sp.get_census_age_brackets(
        sp.datadir,
        country_location=pop.country_location,
        state_location=pop.state_location,
        location=pop.location,
        nbrackets=contact_matrix_nbrackets)
    cm_age_by_brackets_dic = sp.get_age_by_brackets_dic(cm_age_brackets)

    age_threshold = 85
    age_threshold_bracket = cm_age_by_brackets_dic[age_threshold]

    expected_older_contact = np.sum(
        contact_matrix_dic['H'][age_threshold_bracket:,
                                age_threshold_bracket:])

    matrix = sp.calculate_contact_matrix(pop_dict, setting_code='H')

    gen_older_age_contacts = np.sum(matrix[age_threshold:, age_threshold:])
    if expected_older_contact != 0:
        assert gen_older_age_contacts != 0, f'Check failed, individuals over {age_threshold} years old have no contacts with each other in households even though the household contact matrix expects them to.'

    else:
        assert gen_older_age_contacts == 0, f'Check failed, individuals over {age_threshold} years old have {gen_older_age_contacts} household contacts with each other even though the household contact matrix expects them to have none.'
    print('Check passed.')
Exemple #17
0
def test_older_ages_have_household_contacts():
    """
    Test that older age groups (80+) have at least some household contacts with
    other older individuals if expected. Together, if sp.Pop.generate() uses the
    incorrect number of age brackets with the contact matrices, older age groups
    will not be generated as household contacts for each other (when we look at
    the generated contact matrix for households, the blocks between 85+ year
    olds would then be 0 for relatively large populations, even though the
    household contact matrix would have us expect otherwise.)
    """
    test_pars = sc.dcp(pars)

    test_pars.n = settings.pop_sizes.medium_large  # decent size to check older populations in households

    pop = sp.Pop(**test_pars)
    pop_dict = pop.to_dict()
    loc_pars = pop.loc_pars

    contact_matrices = sp.get_contact_matrices(sp.settings.datadir,
                                               sheet_name=pop.sheet_name)

    contact_matrix_nbrackets = contact_matrices[list(
        contact_matrices.keys())[0]].shape[0]
    cm_age_brackets = sp.get_census_age_brackets(
        **sc.mergedicts(loc_pars, {'nbrackets': contact_matrix_nbrackets}))
    cm_age_by_brackets = sp.get_age_by_brackets(cm_age_brackets)

    age_threshold = 80
    age_threshold_bracket = cm_age_by_brackets[age_threshold]

    expected_older_contact = np.sum(
        contact_matrices['H'][age_threshold_bracket:, age_threshold_bracket:])

    matrix = sp.calculate_contact_matrix(pop_dict, layer='H')

    gen_older_age_contacts = np.sum(matrix[age_threshold:, age_threshold:])
    if expected_older_contact != 0:
        assert gen_older_age_contacts != 0, f'Check failed, individuals over {age_threshold} years old have no contacts with each other in households even though the household contact matrix expects them to.'

    else:
        assert gen_older_age_contacts == 0, f'Check failed, individuals over {age_threshold} years old have {gen_older_age_contacts} household contacts with each other even though the household contact matrix expects them to have none.'
    print('Check passed.')
Exemple #18
0
def test_assign_rest_of_workers(state_location='Washington',
                                country_location='usa'):
    workers_by_age_to_assign_count, workplace_size_brackets, workplace_size_distr_by_brackets, \
    workplace_sizes = test_generate_workplace_sizes()

    potential_worker_uids, potential_worker_uids_by_age, employment_rates, age_by_uid_dic = test_get_uids_potential_workers(
    )

    contact_matrix_dic = sp.get_contact_matrix_dic(
        datadir, sheet_name='United States of America')

    age_brackets_16 = sp.get_census_age_brackets(datadir, state_location,
                                                 country_location)
    age_by_brackets_dic_16 = sp.get_age_by_brackets_dic(age_brackets_16)

    syn_workplaces, syn_workplace_uids, potential_worker_uids, potential_worker_uids_by_age, workers_by_age_to_assign_count = sp.assign_rest_of_workers(
        workplace_sizes, potential_worker_uids,
        potential_worker_uids_by_age, workers_by_age_to_assign_count,
        dict(age_by_uid_dic), age_brackets_16, age_by_brackets_dic_16,
        contact_matrix_dic)

    # TODO: Issue #116 assign_rest_of_workers returns empty syn_workplaces and syn_workplace_uids
    # syn_workplaces should return a list of lists where each sublist is a workplace with the ages of workers, not empty
    # for workplace in syn_workplaces:
    #     assert workplace is not None
    # assert syn_workplaces != []

    # syn_worplace_uids should be a list of workers ids, not empty
    # assert syn_workplace_uids != []

    # potential_worker_uids should return a list of potential worker ids
    for worker_id in potential_worker_uids:
        assert worker_id is not None

    # potential_worker_uids_by_age should return a list of potential worker ids mapped by age
    for worker_by_age in potential_worker_uids_by_age:
        assert int(worker_by_age)

    # workers_by_age_to_assign_count should be a dictionary mapping age to the count of workers left to assign
    for worker in workers_by_age_to_assign_count.items():
        assert tuple(worker)
def check_enrollment_distribution(pop,
                                  n,
                                  datadir,
                                  figdir,
                                  location=None,
                                  state_location=None,
                                  country_location=None,
                                  file_path=None,
                                  use_default=False,
                                  test_prefix="test",
                                  skip_stat_check=False,
                                  do_close=True,
                                  plot_only=False,
                                  school_type=None):
    """
    Compute the statistic on expected enrollment-age distribution and compare with actual distribution
    check zero enrollment bins to make sure there is nothing generated

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state
        country_location : name of the country the state_location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True
        plot_only        : plot only without doing any data checks
        school_type      : list of school types e.g. ['pk', 'es', 'ms', 'hs', 'uv']

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    expected_dist = sp.get_school_enrollment_rates(
        datadir=datadir,
        location=location,
        state_location=state_location,
        country_location=country_location,
        file_path=file_path,
        use_default=use_default)
    age_dist = sp.read_age_bracket_distr(datadir=datadir,
                                         location=location,
                                         state_location=state_location,
                                         country_location=country_location,
                                         file_path=file_path,
                                         use_default=use_default)
    brackets = sp.get_census_age_brackets(datadir=datadir,
                                          state_location=state_location,
                                          country_location=country_location)

    figdir = os.path.join(figdir, "enrollment")
    # get actual school enrollment by age
    if school_type is not None:
        actual_per_school_type_dict = dict.fromkeys(school_type)
        for sc in school_type:
            actual_per_school_type_dict[sc] = dict.fromkeys(
                list(range(0, 101)), 0)
    else:
        actual_per_school_type_dict = {}
    actual_pool = []
    actual_dist = dict.fromkeys(list(range(0, 101)), 0)
    for p in pop.values():
        if p["scid"] is not None and p["sc_student"] is not None:
            for sc in actual_per_school_type_dict.keys():
                if p["sc_type"] == sc:
                    actual_per_school_type_dict[sc][p["age"]] += 1
            actual_dist[p["age"]] += 1
            actual_pool.append(p["age"])

    # plot total school enrollment and individual age distribution
    actual_per_school_type_dict["all"] = actual_dist
    if school_type is not None:
        utilities.plot_array([
            sum(actual_per_school_type_dict[i].values())
            for i in actual_per_school_type_dict.keys()
        ],
                             names=actual_per_school_type_dict.keys(),
                             datadir=figdir,
                             testprefix="enrollment_by_school_type\n" +
                             test_prefix,
                             expect_label="enrollment",
                             value_text=True,
                             do_close=do_close)
    for k in actual_per_school_type_dict:
        utilities.plot_array(actual_per_school_type_dict[k].values(),
                             datadir=figdir,
                             testprefix=f"enrollment_by_age {k}\n" +
                             test_prefix,
                             expect_label="enrollment by age bucket",
                             do_close=do_close)

    actual_age_dist = utilities.get_age_distribution_from_pop(pop, brackets)
    # adjust expected enrollment percentage by age brackets
    expected_combined_dist = dict.fromkeys(list(range(0, len(brackets))), 0)
    adjusted_expected_combined_dist = dict.fromkeys(
        list(range(0, len(brackets))), 0)
    actual_combined_dist = dict.fromkeys(list(range(0, len(brackets))), 0)

    scaled_dist = dict.fromkeys(list(range(0, 101)), 0)
    adjusted_scaled_dist = dict.fromkeys(list(range(0, 101)), 0)
    for i in age_dist:
        for j in brackets[i]:
            scaled_dist[j] = (expected_dist[j] * n * age_dist[i]) / len(
                brackets[i])
            adjusted_scaled_dist[j] = (expected_dist[j] * n *
                                       actual_age_dist[i]) / len(brackets[i])
            expected_combined_dist[i] += scaled_dist[j]
            adjusted_expected_combined_dist[i] += adjusted_scaled_dist[j]
            actual_combined_dist[i] += actual_dist[j]

    # construct expected pool adjusted based on expected age distribution
    expected_pool = []
    for key in scaled_dist:
        for i in range(0, int(scaled_dist[key])):
            expected_pool.append(key)

    # construct expected pool adjusted based on the actual age distribution
    adjusted_expected_pool = []
    for key in adjusted_scaled_dist:
        for i in range(0, int(adjusted_scaled_dist[key])):
            adjusted_expected_pool.append(key)

    print(f"total enrollment expected :{int(sum(scaled_dist.values()))}")
    print(
        f"total enrollment expected (adjusted) :{int(sum(adjusted_scaled_dist.values()))}"
    )
    print(f"total enrollment actual :{sum(actual_dist.values())}")

    # make sure results are sorted by key
    # scaled_dist_dist = dict(sorted(scaled_dist.items()))
    actual_dist = dict(sorted(actual_dist.items()))

    expected_values = np.array(list(scaled_dist.values()))
    adjusted_expected_values = np.array(list(adjusted_scaled_dist.values()))
    actual_values = np.array(list(actual_dist.values()))

    expected_combined_values = np.array(list(expected_combined_dist.values()))
    adjusted_expected_combined_values = np.array(
        list(adjusted_expected_combined_dist.values()))
    actual_combined_values = np.array(list(actual_combined_dist.values()))

    utilities.plot_array(expected_values,
                         actual_values,
                         None,
                         figdir,
                         "enrollment_" + test_prefix,
                         do_close=do_close)
    utilities.plot_array(adjusted_expected_values,
                         actual_values,
                         None,
                         figdir,
                         "adjusted enrollment_" + test_prefix,
                         do_close=do_close)

    utilities.plot_array(expected_combined_values,
                         actual_combined_values,
                         np.array([i[0] for i in brackets.values()]),
                         figdir,
                         "enrollment by age bin" + test_prefix,
                         do_close=do_close)
    utilities.plot_array(adjusted_expected_combined_values,
                         actual_combined_values,
                         np.array([i[0] for i in brackets.values()]),
                         figdir,
                         "adjusted enrollment by age bin" + test_prefix,
                         do_close=do_close)
    if plot_only:
        return
    np.savetxt(os.path.join(os.path.dirname(datadir),
                            f"{test_prefix}_expected.csv"),
               expected_values,
               delimiter=",")
    np.savetxt(os.path.join(os.path.dirname(datadir),
                            f"{test_prefix}_actual.csv"),
               actual_values,
               delimiter=",")

    # check for expected 0 count bins
    # if expected enrollment is 0, actual enrollment must be 0
    # if the expected enrollment is greater than threshold, actual enrollment should not be zero
    # here we use tentative threshold 9 meaning if we expected 10+ enrollment and actually
    # generate 0, we should investigate why
    threshold = 9
    assert np.sum(actual_values[expected_values == 0]) == 0, \
        f"expected enrollment should be 0 for these age bins: " \
        f"{str(np.where((expected_values == 0) & (actual_values != 0)))}"

    assert len(actual_values[np.where((expected_values > threshold) & (actual_values == 0))]) == 0, \
        f"actual enrollment should not be 0 for these age bins: " \
        f"{str(np.where((expected_values > threshold) & (actual_values == 0)))}"

    # if expected bin count is less than threshold, use range check to allow some buffer
    # this is usually observed in smaller population in that expected count is small
    # so we allow actual observations to be 0 and up to the expected value plus threshold

    i = np.where((expected_values <= threshold) & (expected_values > 0))
    u = expected_values[i] + threshold  # upper bound
    l = np.zeros(len(expected_values[i]))  # lower bound can be 0
    assert (sum(l <= actual_values[i]) == len(actual_values[i]) and sum(actual_values[i] <= u) == len(
        actual_values[i])), \
        f"results show too much difference:\n" \
        f"expected:{expected_values[i]} \n actual:{actual_values[i]} \n" \
        f"please check these age bins: {i}"

    # check if pool looks right
    # h, bins = np.histogram(np.array(expected_pool), bins=100)
    # h, bins = np.histogram(np.array(actual_pool), bins=100)
    # plt.bar(bins[:-1],h,width=1)
    # plt.show()

    if not skip_stat_check:
        utilities.statistic_test(adjusted_expected_pool,
                                 actual_pool,
                                 test="ks",
                                 comments="enrollment distribution check")
raise Exception(
    'You must download the Census data (see above) before running this script')

import synthpops as sp

datadir = sp.datadir

state_location = 'Washington'
country_location = 'usa'
acs_period = 1

# save = True
save = False

age_brackets = sp.get_census_age_brackets(datadir,
                                          state_location=state_location,
                                          country_location=country_location,
                                          nbrackets=18)
age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

ltcf_rates_by_age = sp.process_long_term_care_facility_rates_by_age(
    datadir, state_location, country_location)
if save:
    sp.write_long_term_care_facility_use_rates(datadir, state_location,
                                               country_location,
                                               ltcf_rates_by_age)
ltcf_rates_by_age = sp.get_long_term_care_facility_use_rates(
    datadir, state_location=state_location, country_location=country_location)

# use the data to estimate the number of long term care facility users for a local region and a given population size
local_population_size = 225e3
location = 'Seattle-Tacoma-Bellevue-WA-Metro-Area'
def check_employment_age_distribution(pop,
                                      n,
                                      datadir,
                                      figdir,
                                      location=None,
                                      state_location=None,
                                      country_location=None,
                                      file_path=None,
                                      use_default=False,
                                      test_prefix="",
                                      skip_stat_check=False,
                                      do_close=True):
    """
    Check the population employment by age distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "employment")
    er = sp.get_employment_rates(datadir=datadir,
                                 location=location,
                                 state_location=state_location,
                                 country_location=country_location,
                                 file_path=file_path,
                                 use_default=use_default)
    brackets = sp.get_census_age_brackets(datadir=datadir,
                                          state_location=state_location,
                                          country_location=country_location)
    ageindex = sp.get_age_by_brackets_dic(brackets)
    age_dist = sp.read_age_bracket_distr(datadir=datadir,
                                         location=location,
                                         state_location=state_location,
                                         country_location=country_location,
                                         file_path=file_path,
                                         use_default=use_default)
    # counting the actual population by age with employment including teachers and staffs
    actual_employed_age_dist, actual_unemployed_age_dist = \
        utilities.get_ids_count_by_param(pop,
                                         condition_name=['wpid', 'sc_teacher', 'sc_staff'],
                                         param='age')
    utilities.plot_array([
        actual_employed_age_dist[k] for k in sorted(actual_employed_age_dist)
    ],
                         datadir=figdir,
                         names=[k for k in sorted(actual_employed_age_dist)],
                         expect_label='employed by age count',
                         xlabel_rotation=90,
                         testprefix="employeed count by age " + test_prefix)
    utilities.plot_array([
        actual_unemployed_age_dist[k]
        for k in sorted(actual_unemployed_age_dist)
    ],
                         datadir=figdir,
                         names=[k for k in sorted(actual_unemployed_age_dist)],
                         expect_label='unemployed by age count',
                         xlabel_rotation=90,
                         testprefix="unemployed count by age " + test_prefix)

    sorted_actual_employed_rate = {}
    actual_employed_rate = utilities.calc_rate(actual_employed_age_dist,
                                               actual_unemployed_age_dist)
    for i in er.keys():
        if i in actual_employed_rate:
            sorted_actual_employed_rate[i] = actual_employed_rate[i]
        else:
            sorted_actual_employed_rate[i] = 0
    actual_values = np.array(list(sorted_actual_employed_rate.values()))
    expected_values = np.array(list(er.values()))
    if not skip_stat_check:
        utilities.statistic_test(expected_values,
                                 actual_values,
                                 test="x",
                                 comments="employment rate distribution check")
    # plotting fill 0 to under age 16 for better display
    filled_count = min(er.keys())
    expected_values = np.insert(expected_values, 0, np.zeros(filled_count))
    actual_values = np.insert(actual_values, 0, np.zeros(filled_count))
    names = [i for i in range(0, max(er.keys()) + 1)]
    # somehow double stacks for age 100
    utilities.plot_array(
        expected_values,
        actual_values,
        names=None,
        datadir=figdir,
        testprefix="employment rate distribution " + test_prefix,
        do_close=do_close,
    )

    # check if total employment match
    expected_employed_brackets = {k: 0 for k in brackets}
    actual_employed_brackets = {k: 0 for k in brackets}
    for i in names:
        expected_employed_brackets[ageindex[i]] += expected_values[i]
        if i in actual_employed_age_dist:
            actual_employed_brackets[
                ageindex[i]] += actual_employed_age_dist[i]
    for i in expected_employed_brackets:
        expected_employed_brackets[i] = expected_employed_brackets[i] / len(
            brackets[i]) * age_dist[i] * n

    expected_total = np.array(list(expected_employed_brackets.values()))
    actual_total = np.array(list(actual_employed_brackets.values()))
    utilities.plot_array(expected_total,
                         actual_total,
                         names=brackets.keys(),
                         datadir=figdir,
                         testprefix="employment total " + test_prefix,
                         do_close=do_close)
    expected_etotal = np.round(np.sum(expected_total))
    actual_etotal = np.round(np.sum(actual_total))
    utilities.check_error_percentage(n,
                                     expected_etotal,
                                     actual_etotal,
                                     name="employee")
def smooth_binned_age_distribution(pars, do_show=False):
    sp.logger.info(f"Smoothing out age distributions with moving averages.")

    s = dict()
    # raw_age_bracket_distr = sp.read_age_bracket_distr(sp.datadir, location=pars['location'], state_location=pars['state_location'], country_location=pars['country_location'])
    raw_age_distr = sp.get_smoothed_single_year_age_distr(
        sp.datadir,
        location=pars['location'],
        state_location=pars['state_location'],
        country_location=pars['country_location'],
        window_length=1)
    age_brackets = sp.get_census_age_brackets(
        sp.datadir,
        country_location=pars['country_location'],
        state_location=pars['state_location'],
        location=pars['location'])
    max_age = age_brackets[max(age_brackets.keys())][-1]

    age_range = np.arange(max_age + 1)

    for si in np.arange(3, 8, 2):

        smoothed_age_distr = sp.get_smoothed_single_year_age_distr(
            sp.datadir,
            location=pars['location'],
            state_location=pars['state_location'],
            country_location=pars['country_location'],
            window_length=si)
        s[si] = np.array([smoothed_age_distr[a] for a in age_range])

    fig, ax = plt.subplots(1, 1, figsize=(10, 6))

    cmap = mplt.cm.get_cmap('cmr.ember')

    if len(s) > 3:
        cmap1 = cmr.get_sub_cmap('cmr.rainforest', 0.13, 0.85)
        cmap2 = cmr.get_sub_cmap('cmr.rainforest', 0.20, 0.92)
    else:
        cmap1 = cmr.get_sub_cmap('cmr.rainforest', 0.18, 0.68)
        cmap2 = cmr.get_sub_cmap('cmr.rainforest', 0.25, 0.75)

    delta = 1 / (len(s) - 1)

    age_range = np.array(sorted(smoothed_age_distr.keys()))

    r = np.array([raw_age_distr[a] for a in age_range])

    ax.plot(age_range,
            r,
            color=cmap(0.55),
            marker='o',
            markerfacecolor=cmap(0.65),
            markersize=3,
            markeredgewidth=1,
            alpha=0.65,
            label='Raw')

    for ns, si in enumerate(sorted(s.keys())):
        ax.plot(age_range,
                s[si],
                color=cmap1(ns * delta),
                marker='o',
                markerfacecolor=cmap2(ns * delta),
                markeredgewidth=1,
                markersize=3,
                alpha=.75,
                label=f'Smoothing window = {si}')

    leg = ax.legend(loc=3)
    leg.draw_frame(False)
    ax.set_xlim(age_range[0], age_range[-1])
    ax.set_ylim(bottom=0.)
    ax.set_xlabel('Age')
    ax.set_ylabel('Distribution (%)')
    ax.set_title(
        f"Smoothing Binned Age Distribution: {pars['location'].replace('_', ' ').replace('-', ' ')}"
    )

    if do_show:
        plt.show()

    return fig, ax
    mplt.rcParams['font.family'] = prop.get_name()
except:
    print("You don't have access to the nice fonts folder mate.")

cmap = mplt.cm.get_cmap(cmocean.cm.deep_r)
cmap2 = mplt.cm.get_cmap(cmocean.cm.curl_r)
cmap3 = mplt.cm.get_cmap(cmocean.cm.matter)

datadir = sp.datadir

state_location = 'Washington'
location = 'seattle_metro'
country_location = 'usa'
use_bayesian = False

age_brackets = sp.get_census_age_brackets(datadir, country_location,
                                          use_bayesian)
age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

num_agebrackets = 18
contact_matrix_dic = sp.get_contact_matrix_dic(datadir, state_location,
                                               num_agebrackets)

household_size_distr = sp.get_household_size_distr(datadir, location,
                                                   state_location,
                                                   country_location,
                                                   use_bayesian)
print(household_size_distr)

# Nhomes = 20000
Nhomes = 10000
Exemple #24
0
        symmetric_matrix = np.zeros((max_age + 1, max_age + 1))

        for p in range(len(sim.people)):

            for layer in ['h', 's', 'w']:
                contacts = sim.people.contacts[layer]['p2'][
                    sim.people.contacts[layer]['p1'] == p]
                n_contacts = (sim.people.contacts[layer]['p1'] == p).sum()
                contact_ages = ages[contacts]

                for ca in contact_ages:
                    symmetric_matrix[ages[p]][ca] += 1
                n_contacts_count[ages[p]] += n_contacts

        age_brackets = sp.get_census_age_brackets(sp.datadir,
                                                  state_location='Washington',
                                                  country_location='usa')
        age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

        aggregate_age_count = sp.get_aggregate_ages(age_count,
                                                    age_by_brackets_dic)
        aggregate_matrix = symmetric_matrix.copy()
        aggregate_matrix = sp.get_aggregate_matrix(aggregate_matrix,
                                                   age_by_brackets_dic)

        asymmetric_matrix = sp.get_asymmetric_matrix(aggregate_matrix,
                                                     aggregate_age_count)

        fig = plt.figure(figsize=(8, 8))
        ax = fig.add_subplot(111)
        im = ax.imshow(asymmetric_matrix.T,
Exemple #25
0
def plot_data_contact_matrix(datadir,
                             location='seattle_metro',
                             state_location='Washington',
                             country_location='usa',
                             sheet_name='United States of America',
                             setting_code='H',
                             logcolors_flag=True):

    asymmetric_M = sp.get_contact_matrix(datadir,
                                         setting_code,
                                         sheet_name=sheet_name)

    age_brackets = sp.get_census_age_brackets(
        datadir,
        state_location=state_location,
        country_location=country_location)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    cmap = mplt.cm.get_cmap(cmocean.cm.matter_r)

    fig = plt.figure(figsize=(9, 9))
    ax = fig.add_subplot(111)

    titles = {'H': 'Household', 'S': 'School', 'W': 'Work'}

    if logcolors_flag:

        vbounds = {}
        if density_or_frequency == 'density':
            # if aggregate_flag:
            vbounds['H'] = {'vmin': 1e-2, 'vmax': 1e1}
            vbounds['S'] = {'vmin': 1e-3, 'vmax': 1e-0}
            vbounds['W'] = {'vmin': 1e-3, 'vmax': 1e-0}
            # else:
            # vbounds['H'] = {'vmin': 1e-3, 'vmax': 1e-1}
            # vbounds['S'] = {'vmin': 1e-3, 'vmax': 1e-1}
            # vbounds['W'] = {'vmin': 1e-3, 'vmax': 1e-1}

        elif density_or_frequency == 'frequency':
            # if aggregate_flag:
            vbounds['H'] = {'vmin': 1e-2, 'vmax': 1e0}
            vbounds['S'] = {'vmin': 1e-2, 'vmax': 1e1}
            vbounds['W'] = {'vmin': 1e-2, 'vmax': 1e0}
            # else:
            # vbounds['H'] = {'vmin': 1e-2, 'vmax': 1e0}
            # vbounds['S'] = {'vmin': 1e-2, 'vmax': 1e0}
            # vbounds['W'] = {'vmin': 1e-2, 'vmax': 1e0}

        im = ax.imshow(asymmetric_M.T,
                       origin='lower',
                       interpolation='nearest',
                       cmap=cmap,
                       norm=LogNorm(vmin=vbounds[setting_code]['vmin'],
                                    vmax=vbounds[setting_code]['vmax']))
    else:
        im = ax.imshow(asymmetric_M.T,
                       origin='lower',
                       interpolation='nearest',
                       cmap=cmap)
    implot = im

    divider = make_axes_locatable(ax)
    cax = divider.new_horizontal(size="4%", pad=0.15)

    fig.add_axes(cax)
    cbar = fig.colorbar(implot, cax=cax)
    cbar.ax.tick_params(axis='y', labelsize=20)
    if density_or_frequency == 'frequency':
        cbar.ax.set_ylabel('Frequency of Contacts', fontsize=20)
    else:
        cbar.ax.set_ylabel('Density of Contacts', fontsize=20)
    ax.tick_params(labelsize=20)
    ax.set_xlabel('Age', fontsize=24)
    ax.set_ylabel('Age of Contacts', fontsize=24)
    ax.set_title(titles[setting_code] + ' Contact Patterns', fontsize=28)

    if aggregate_flag:
        tick_labels = [
            str(age_brackets[b][0]) + '-' + str(age_brackets[b][-1])
            for b in age_brackets
        ]
        ax.set_xticks(np.arange(len(tick_labels)))
        ax.set_xticklabels(tick_labels, fontsize=1)
        ax.set_xticklabels(tick_labels, fontsize=18, rotation=50)
        ax.set_yticks(np.arange(len(tick_labels)))
        ax.set_yticklabels(tick_labels, fontsize=18)

    return fig
def check_age_distribution(pop,
                           n,
                           datadir,
                           figdir,
                           location=None,
                           state_location=None,
                           country_location=None,
                           file_path=None,
                           use_default=False,
                           test_prefix="test",
                           skip_stat_check=False,
                           do_close=True):
    """
    Construct histogram from expected age distribution and compare with the actual generated data.

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "age_distribution")
    age_dist = sp.read_age_bracket_distr(datadir=datadir,
                                         location=location,
                                         state_location=state_location,
                                         country_location=country_location,
                                         file_path=file_path,
                                         use_default=use_default)
    brackets = sp.get_census_age_brackets(datadir=datadir,
                                          state_location=state_location,
                                          country_location=country_location)
    # un-normalized data
    # expected_values = np.array(list(age_dist.values())) * n
    # actual_values = get_age_distribution_from_pop(pop, brackets, False)
    # normalized
    expected_values = np.array(list(age_dist.values()))
    actual_values = utilities.get_age_distribution_from_pop(pop, brackets)
    names = np.array([i[0] for i in brackets.values()])
    utilities.plot_array(expected_values,
                         actual_values,
                         names,
                         figdir,
                         "age_distribution_" + test_prefix,
                         do_close=do_close)
    if not skip_stat_check:
        utilities.statistic_test(expected_values,
                                 actual_values,
                                 test="x",
                                 comments="age distribution check")