Python get_school_size_bracketsの例、synthpops.get_school_size_brackets Pythonの例

コード例 #1

0

ファイルを表示

def test_send_students_to_school(n=10000,
                                 location='seattle_metro',
                                 state_location='Washington',
                                 country_location='usa',
                                 folder_name='contact_networks'):

    homes = sprw.read_setting_groups(datadir,
                                     location,
                                     state_location,
                                     country_location,
                                     folder_name,
                                     'households',
                                     n,
                                     with_ages=True)

    homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes)

    uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(
        datadir,
        n,
        location,
        state_location,
        country_location,
        age_by_uid_dic,
        homes_by_uids,
        use_default=False)

    school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(
        datadir, location, state_location, country_location)
    school_size_brackets = sp.get_school_size_brackets(datadir, location,
                                                       state_location,
                                                       country_location)
    school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket,
                                            school_size_brackets,
                                            uids_in_school)

    age_brackets_filepath = sp.get_census_age_brackets_path(
        datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(
        datadir, sheet_name='United States of America')

    syn_schools, syn_school_uids, syn_school_types = sp.send_students_to_school(
        school_sizes,
        uids_in_school,
        uids_in_school_by_age,
        ages_in_school_count,
        age_brackets,
        age_by_brackets_dic,
        contact_matrix_dic,
        verbose=False)
    assert syn_schools, syn_school_uids is not None

    return syn_schools, syn_school_uids

コード例 #2

0

ファイルを表示

ファイル: test_synthpops.py プロジェクト: pristineVedansh/synthpops

def test_generate_school_sizes(location='seattle_metro', state_location='Washington',
                               country_location='usa', folder_name='contact_networks'):
    Nhomes = 10000
    uids_in_school = sp.get_uids_in_school(datadir, Nhomes, location,
                                           state_location,
                                           country_location,
                                           folder_name=folder_name,
                                           use_default=True)

    school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(datadir, location, state_location,
                                                                        country_location)
    school_size_brackets = sp.get_school_size_brackets(datadir, location, state_location, country_location)
    school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket, school_size_brackets, uids_in_school)
    assert school_sizes is not None

コード例 #3

0

ファイルを表示

ファイル: test_synthpops.py プロジェクト: pristineVedansh/synthpops

def test_generate_workplace_sizes(location='seattle_metro', state_location='Washington',
                                  country_location='usa', folder_name='contact_networks'):
    Npeople = 10000
    uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(datadir, Npeople, location,
                                                                                        state_location,
                                                                                        country_location,
                                                                                        folder_name=folder_name,
                                                                                        use_default=True)

    school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(datadir, location, state_location,
                                                                        country_location)
    school_size_brackets = sp.get_school_size_brackets(datadir, location, state_location, country_location)
    school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket, school_size_brackets, uids_in_school)

    age_brackets_filepath = sp.get_census_age_brackets_path(datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(datadir, sheet_name='United States of America')

    # Need to instead get syn_schools now
    syn_schools, syn_school_uids = sp.send_students_to_school(school_sizes, uids_in_school, uids_in_school_by_age,
                                                              ages_in_school_count, age_brackets, age_by_brackets_dic,
                                                              contact_matrix_dic)

    employment_rates = sp.get_employment_rates(datadir, location=location, state_location=state_location,
                                               country_location=country_location, use_default=True)

    age_by_uid_dic = sp.read_in_age_by_uid(datadir, location, state_location, country_location, folder_name, Npeople)

    potential_worker_uids, potential_worker_uids_by_age, potential_worker_ages_left_count = sp.get_uids_potential_workers(
        syn_school_uids, employment_rates, age_by_uid_dic)

    workers_by_age_to_assign_count = sp.get_workers_by_age_to_assign(employment_rates, potential_worker_ages_left_count,
                                                                     age_by_uid_dic)

    workplace_size_brackets = sp.get_workplace_size_brackets(datadir, location, state_location, country_location,
                                                             use_default=True)

    workplace_size_distr_by_brackets = sp.get_workplace_size_distr_by_brackets(datadir,
                                                                               state_location=state_location,
                                                                               country_location=country_location,
                                                                               use_default=True)
    workplace_sizes = sp.generate_workplace_sizes(workplace_size_distr_by_brackets, workplace_size_brackets,
                                                  workers_by_age_to_assign_count)

    return workers_by_age_to_assign_count, workplace_size_brackets, workplace_size_distr_by_brackets, workplace_sizes

コード例 #4

0

ファイルを表示

ファイル: test_synthpops.py プロジェクト: pristineVedansh/synthpops

def test_send_students_to_school(n=10000, location='seattle_metro', state_location='Washington',
                                 country_location='usa', folder_name='contact_networks'):

    homes = sp.read_setting_groups(datadir, location, state_location, country_location, 'households', folder_name, n, with_ages=True)

    homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes)

    uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(datadir, n, location,
                                                                                        state_location,
                                                                                        country_location,
                                                                                        age_by_uid_dic,
                                                                                        homes_by_uids,
                                                                                        use_default=False)

    # assert uids_in_school is not None

# def test_send_students_to_school(n=1000, location='seattle_metro', state_location='Washington',
#                                  country_location='usa'):
    # homes = sp.get_head_age_by_size_distr(datadir, state_location, country_location, file_path=None,
    #                                       household_size_1_included=False, use_default=True)
    # homes_by_uids, age_by_uid_dic = sp.assign_uids_by_homes(homes, id_len=16)

#     uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(datadir, n, location,
#                                                                                         state_location,
#                                                                                         country_location,
#                                                                                         age_by_uid_dic,
#                                                                                         homes_by_uids,
#                                                                                         use_default=False)
# >>>>>>> origin/mf/update-saved-pop-fixes

    school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(datadir, location, state_location,
                                                                        country_location)
    school_size_brackets = sp.get_school_size_brackets(datadir, location, state_location, country_location)
    school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket, school_size_brackets, uids_in_school)

    age_brackets_filepath = sp.get_census_age_brackets_path(datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(datadir, sheet_name='United States of America')

    syn_schools, syn_school_uids = sp.send_students_to_school(school_sizes, uids_in_school, uids_in_school_by_age,
                                                              ages_in_school_count, age_brackets, age_by_brackets_dic,
                                                              contact_matrix_dic, verbose=False)
    assert syn_schools, syn_school_uids is not None

    return syn_schools, syn_school_uids

コード例 #5

0

ファイルを表示

def test_send_students_to_school(location='seattle_metro',
                                 state_location='Washington',
                                 country_location='usa'):
    NPeople = 10000

    uids_in_school, uids_in_school_by_age, ages_in_school_count = sp.get_uids_in_school(
        datadir,
        NPeople,
        location,
        state_location,
        country_location,
        use_default=True)

    school_size_distr_by_bracket = sp.get_school_size_distr_by_brackets(
        datadir, location, state_location, country_location)
    school_size_brackets = sp.get_school_size_brackets(datadir, location,
                                                       state_location,
                                                       country_location)
    school_sizes = sp.generate_school_sizes(school_size_distr_by_bracket,
                                            school_size_brackets,
                                            uids_in_school)

    age_brackets_filepath = sp.get_census_age_brackets_path(
        datadir, state_location, country_location)
    age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath)
    age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets)

    contact_matrix_dic = sp.get_contact_matrix_dic(
        datadir, sheet_name='United States of America')

    syn_schools, syn_school_uids = sp.send_students_to_school(
        school_sizes,
        uids_in_school,
        uids_in_school_by_age,
        ages_in_school_count,
        age_brackets,
        age_by_brackets_dic,
        contact_matrix_dic,
        verbose=False)
    assert syn_schools, syn_school_uids is not None

コード例 #6

0

ファイルを表示

def plot_school_sizes_by_type(pop, pars, do_show=False):
    """
    Plot the school size distribution by type compared with the expected data.
    """
    sp.logger.info(
        f"Plotting to show that school sizes are generated by school type when the parameter 'with_school_types' is set to True."
    )

    if pars['with_school_types']:
        expected_school_size_distr = sp.get_school_size_distr_by_type(
            sp.datadir,
            location=pars['location'],
            state_location=pars['state_location'],
            country_location=pars['country_location'],
            use_default=pars['use_default'])
        school_size_brackets = sp.get_school_size_brackets(
            sp.datadir,
            location=pars['location'],
            state_location=pars['state_location'],
            country_location=pars['country_location']
        )  # for right now the size distribution for all school types will use the same brackets or bins
    else:
        expected_school_size_distr = {
            None:
            sp.get_school_size_distr_by_brackets(
                sp.datadir,
                location=pars['location'],
                state_location=pars['state_location'],
                country_location=pars['country_location'],
                use_default=pars['use_default'])
        }
        school_size_brackets = sp.get_school_size_brackets(
            sp.datadir,
            location=pars['location'],
            state_location=pars['state_location'],
            country_location=pars['country_location'])

    bins = [school_size_brackets[0][0]] + [
        school_size_brackets[b][-1] + 1 for b in school_size_brackets
    ]

    schools = dict()
    enrollment_by_school_type = dict()
    gen_school_size_distr = dict()

    for i, person in pop.items():
        if person['scid'] is not None and person['sc_student']:
            schools.setdefault(person['scid'], dict())
            schools[person['scid']]['sc_type'] = person['sc_type']
            schools[person['scid']].setdefault('enrolled', 0)
            schools[person['scid']]['enrolled'] += 1

    for i, school in schools.items():
        enrollment_by_school_type.setdefault(school['sc_type'], [])
        enrollment_by_school_type[school['sc_type']].append(school['enrolled'])

    for sc_type in enrollment_by_school_type:
        sizes = enrollment_by_school_type[sc_type]
        hist, bins = np.histogram(sizes, bins=bins, density=0)
        gen_school_size_distr[sc_type] = {
            i: hist[i] / sum(hist)
            for i in school_size_brackets
        }

    gen_school_size_distr = sc.objdict(gen_school_size_distr)

    width = 6
    height = 3 * len(gen_school_size_distr)
    hspace = 0.4

    cmap = cmr.get_sub_cmap('cmo.curl', 0.12, 1)
    fig, ax = plt.subplots(len(gen_school_size_distr),
                           1,
                           figsize=(width, height),
                           tight_layout=True)
    plt.subplots_adjust(hspace=hspace)
    if len(gen_school_size_distr) == 1:
        ax = [ax]

    bin_labels = [
        f"{school_size_brackets[b][0]}-{school_size_brackets[b][-1]}"
        for b in school_size_brackets
    ]

    sorted_school_types = sorted(gen_school_size_distr.keys())

    for ns, school_type in enumerate(sorted_school_types):
        x = np.arange(len(school_size_brackets))

        c = ns / len(gen_school_size_distr)
        c2 = min(c + 0.1, 1)

        sorted_bins = sorted(expected_school_size_distr[school_type].keys())

        ax[ns].bar(
            x,
            [expected_school_size_distr[school_type][b] for b in sorted_bins],
            color=cmap(c),
            edgecolor='white',
            label='Expected',
            zorder=0)
        ax[ns].plot(
            x, [gen_school_size_distr[school_type][b] for b in sorted_bins],
            color=cmap(c2),
            ls='--',
            marker='o',
            markerfacecolor=cmap(c2),
            markeredgecolor='white',
            markeredgewidth=.5,
            markersize=5,
            label='Simulated',
            zorder=1)

        leg = ax[ns].legend(loc=1)
        leg.draw_frame(False)
        ax[ns].set_xticks(x)
        ax[ns].set_xticklabels(bin_labels, rotation=25)
        ax[ns].set_xlim(0, x[-1])
        ax[ns].set_ylim(0, 1)
        if school_type is None:
            title = "without school types defined"
        else:
            title = f"{school_type}"

        if ns == 0:
            if pars['location'] is not None:
                location_text = f"{pars['location'].replace('_', ' ').title()}"
            else:
                location_text = f"{sp.config.default_location.replace('_', ' ').title()} Default Sizes"

            ax[ns].text(0.,
                        1.1,
                        location_text,
                        horizontalalignment='left',
                        fontsize=10)

        ax[ns].set_title(title, fontsize=10)

    ax[ns].set_xlabel('School size')

    if do_show:
        plt.show()

    return fig, ax, sorted_school_types

コード例 #7

0

ファイルを表示

ファイル: utilities_dist.py プロジェクト: TTI-modelling/synthpops

def check_school_size_distribution(pop,
                                   n,
                                   datadir,
                                   figdir,
                                   location=None,
                                   state_location=None,
                                   country_location=None,
                                   file_path=None,
                                   use_default=False,
                                   test_prefix="",
                                   skip_stat_check=True,
                                   do_close=True,
                                   school_type=None):
    """
    Check the school size distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state
        country_location : name of the country the state_location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True
        school_type      : list of school types e.g. ['pk', 'es', 'ms', 'hs', 'uv']

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "school_size")
    sb = sp.get_school_size_brackets(datadir=datadir,
                                     location=location,
                                     state_location=state_location,
                                     country_location=country_location,
                                     file_path=file_path,
                                     use_default=use_default)
    sb_index = sp.get_index_by_brackets_dic(sb)

    expected_school_size_by_brackets = sp.get_school_size_distr_by_brackets(
        datadir=datadir,
        location=location,
        state_location=state_location,
        country_location=country_location)
    actual_school, actual_school_none = utilities.get_ids_count_by_param(
        pop, "scid")
    actual_school_student_only, actual_school_none_student_only = utilities.get_ids_count_by_param(
        pop, "sc_student", "scid")
    actual_per_school_type_dict = {}
    actual_per_school_type_dict_student_only = {}
    actual_per_school_type_dict["all"] = actual_school
    actual_per_school_type_dict_student_only[
        "all"] = actual_school_student_only
    if school_type is not None:
        for sc in school_type:
            actual_per_school_type_dict[sc] = \
                utilities.get_ids_count_by_param(pop, "sc_type", param="scid", condition_value=sc)[0]
            actual_per_school_type_dict_student_only[sc] = \
                utilities.get_ids_count_by_param(pop, "sc_type", param="scid", condition_value=sc, filter_expression={'sc_student':'1'})[0]

    # get individual school type size distribution
    for k in actual_per_school_type_dict:
        actual_scount = dict(Counter(actual_per_school_type_dict[k].values()))
        actual_scount_student_only = dict(
            Counter(actual_per_school_type_dict_student_only[k].values()))
        actual_school_size_by_brackets = sp.norm_dic(
            utilities.get_bucket_count(sb_index, sb, actual_scount))
        expected_values = np.array(
            list(expected_school_size_by_brackets.values()))
        actual_values = np.array(list(actual_school_size_by_brackets.values()))
        utilities.plot_array(expected_values,
                             actual_values,
                             names=sb.keys(),
                             datadir=figdir,
                             testprefix="school size " + test_prefix + " " + k,
                             do_close=do_close)
        utilities.plot_array(
            actual_per_school_type_dict[k].values(),
            datadir=figdir,
            expect_label=
            f"school count: total {len(actual_per_school_type_dict[k])}",
            testprefix="school size total\n" + test_prefix + " " + k,
            binned=False,
            do_close=do_close)
        utilities.plot_array(
            actual_per_school_type_dict_student_only[k].values(),
            datadir=figdir,
            expect_label=
            f"school count: total {len(actual_per_school_type_dict[k])}",
            testprefix="school size total (student only)\n" + test_prefix +
            " " + k,
            binned=False,
            do_close=do_close)
        # statistic_test is not working yet because school sizes are now available by school type. Also depends strongly on population size.
        if not skip_stat_check:
            utilities.statistic_test(expected_values,
                                     actual_values,
                                     test="x",
                                     comments="school size check")
        # check average school size
        school_size_brackets = sp.get_school_size_brackets(
            datadir=datadir,
            location=location,
            country_location=country_location,
            state_location=state_location)
        # calculate the average school size per bracket
        average_school_size_in_bracket = [
            sum(i) / len(i) for i in school_size_brackets.values()
        ]

        # calculate expected school size based on expected value sum(distribution * size)
        expected_average_school_size = sum([
            v[1] * average_school_size_in_bracket[v[0]]
            for v in expected_school_size_by_brackets.items()
        ])
        actual_average_school_size = sum(
            [i * actual_scount[i]
             for i in actual_scount]) / sum(actual_scount.values())
        utilities.check_error_percentage(n,
                                         expected_average_school_size,
                                         actual_average_school_size,
                                         name=f"average school size:'{k}'")
    # check school count distribution
    utilities.plot_array([
        len(actual_per_school_type_dict[i])
        for i in actual_per_school_type_dict
    ],
                         names=list(actual_per_school_type_dict.keys()),
                         datadir=figdir,
                         expect_label="school count",
                         testprefix="school count " + test_prefix,
                         value_text=True)