Esempi in Python per get_ids_count_by_param

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: utilities

Metodo/funzione: get_ids_count_by_param

Esempi su hotexamples.com: 4

get_ids_count_by_param in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per utilities.get_ids_count_by_param, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: utilities_dist.py Progetto: TTI-modelling/synthpops

def check_employment_age_distribution(pop,
                                      n,
                                      datadir,
                                      figdir,
                                      location=None,
                                      state_location=None,
                                      country_location=None,
                                      file_path=None,
                                      use_default=False,
                                      test_prefix="",
                                      skip_stat_check=False,
                                      do_close=True):
    """
    Check the population employment by age distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "employment")
    er = sp.get_employment_rates(datadir=datadir,
                                 location=location,
                                 state_location=state_location,
                                 country_location=country_location,
                                 file_path=file_path,
                                 use_default=use_default)
    brackets = sp.get_census_age_brackets(datadir=datadir,
                                          state_location=state_location,
                                          country_location=country_location)
    ageindex = sp.get_age_by_brackets_dic(brackets)
    age_dist = sp.read_age_bracket_distr(datadir=datadir,
                                         location=location,
                                         state_location=state_location,
                                         country_location=country_location,
                                         file_path=file_path,
                                         use_default=use_default)
    # counting the actual population by age with employment including teachers and staffs
    actual_employed_age_dist, actual_unemployed_age_dist = \
        utilities.get_ids_count_by_param(pop,
                                         condition_name=['wpid', 'sc_teacher', 'sc_staff'],
                                         param='age')
    utilities.plot_array([
        actual_employed_age_dist[k] for k in sorted(actual_employed_age_dist)
    ],
                         datadir=figdir,
                         names=[k for k in sorted(actual_employed_age_dist)],
                         expect_label='employed by age count',
                         xlabel_rotation=90,
                         testprefix="employeed count by age " + test_prefix)
    utilities.plot_array([
        actual_unemployed_age_dist[k]
        for k in sorted(actual_unemployed_age_dist)
    ],
                         datadir=figdir,
                         names=[k for k in sorted(actual_unemployed_age_dist)],
                         expect_label='unemployed by age count',
                         xlabel_rotation=90,
                         testprefix="unemployed count by age " + test_prefix)

    sorted_actual_employed_rate = {}
    actual_employed_rate = utilities.calc_rate(actual_employed_age_dist,
                                               actual_unemployed_age_dist)
    for i in er.keys():
        if i in actual_employed_rate:
            sorted_actual_employed_rate[i] = actual_employed_rate[i]
        else:
            sorted_actual_employed_rate[i] = 0
    actual_values = np.array(list(sorted_actual_employed_rate.values()))
    expected_values = np.array(list(er.values()))
    if not skip_stat_check:
        utilities.statistic_test(expected_values,
                                 actual_values,
                                 test="x",
                                 comments="employment rate distribution check")
    # plotting fill 0 to under age 16 for better display
    filled_count = min(er.keys())
    expected_values = np.insert(expected_values, 0, np.zeros(filled_count))
    actual_values = np.insert(actual_values, 0, np.zeros(filled_count))
    names = [i for i in range(0, max(er.keys()) + 1)]
    # somehow double stacks for age 100
    utilities.plot_array(
        expected_values,
        actual_values,
        names=None,
        datadir=figdir,
        testprefix="employment rate distribution " + test_prefix,
        do_close=do_close,
    )

    # check if total employment match
    expected_employed_brackets = {k: 0 for k in brackets}
    actual_employed_brackets = {k: 0 for k in brackets}
    for i in names:
        expected_employed_brackets[ageindex[i]] += expected_values[i]
        if i in actual_employed_age_dist:
            actual_employed_brackets[
                ageindex[i]] += actual_employed_age_dist[i]
    for i in expected_employed_brackets:
        expected_employed_brackets[i] = expected_employed_brackets[i] / len(
            brackets[i]) * age_dist[i] * n

    expected_total = np.array(list(expected_employed_brackets.values()))
    actual_total = np.array(list(actual_employed_brackets.values()))
    utilities.plot_array(expected_total,
                         actual_total,
                         names=brackets.keys(),
                         datadir=figdir,
                         testprefix="employment total " + test_prefix,
                         do_close=do_close)
    expected_etotal = np.round(np.sum(expected_total))
    actual_etotal = np.round(np.sum(actual_total))
    utilities.check_error_percentage(n,
                                     expected_etotal,
                                     actual_etotal,
                                     name="employee")

Esempio n. 2

Mostra file

File: utilities_dist.py Progetto: TTI-modelling/synthpops

def check_school_size_distribution(pop,
                                   n,
                                   datadir,
                                   figdir,
                                   location=None,
                                   state_location=None,
                                   country_location=None,
                                   file_path=None,
                                   use_default=False,
                                   test_prefix="",
                                   skip_stat_check=True,
                                   do_close=True,
                                   school_type=None):
    """
    Check the school size distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state
        country_location : name of the country the state_location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True
        school_type      : list of school types e.g. ['pk', 'es', 'ms', 'hs', 'uv']

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "school_size")
    sb = sp.get_school_size_brackets(datadir=datadir,
                                     location=location,
                                     state_location=state_location,
                                     country_location=country_location,
                                     file_path=file_path,
                                     use_default=use_default)
    sb_index = sp.get_index_by_brackets_dic(sb)

    expected_school_size_by_brackets = sp.get_school_size_distr_by_brackets(
        datadir=datadir,
        location=location,
        state_location=state_location,
        country_location=country_location)
    actual_school, actual_school_none = utilities.get_ids_count_by_param(
        pop, "scid")
    actual_school_student_only, actual_school_none_student_only = utilities.get_ids_count_by_param(
        pop, "sc_student", "scid")
    actual_per_school_type_dict = {}
    actual_per_school_type_dict_student_only = {}
    actual_per_school_type_dict["all"] = actual_school
    actual_per_school_type_dict_student_only[
        "all"] = actual_school_student_only
    if school_type is not None:
        for sc in school_type:
            actual_per_school_type_dict[sc] = \
                utilities.get_ids_count_by_param(pop, "sc_type", param="scid", condition_value=sc)[0]
            actual_per_school_type_dict_student_only[sc] = \
                utilities.get_ids_count_by_param(pop, "sc_type", param="scid", condition_value=sc, filter_expression={'sc_student':'1'})[0]

    # get individual school type size distribution
    for k in actual_per_school_type_dict:
        actual_scount = dict(Counter(actual_per_school_type_dict[k].values()))
        actual_scount_student_only = dict(
            Counter(actual_per_school_type_dict_student_only[k].values()))
        actual_school_size_by_brackets = sp.norm_dic(
            utilities.get_bucket_count(sb_index, sb, actual_scount))
        expected_values = np.array(
            list(expected_school_size_by_brackets.values()))
        actual_values = np.array(list(actual_school_size_by_brackets.values()))
        utilities.plot_array(expected_values,
                             actual_values,
                             names=sb.keys(),
                             datadir=figdir,
                             testprefix="school size " + test_prefix + " " + k,
                             do_close=do_close)
        utilities.plot_array(
            actual_per_school_type_dict[k].values(),
            datadir=figdir,
            expect_label=
            f"school count: total {len(actual_per_school_type_dict[k])}",
            testprefix="school size total\n" + test_prefix + " " + k,
            binned=False,
            do_close=do_close)
        utilities.plot_array(
            actual_per_school_type_dict_student_only[k].values(),
            datadir=figdir,
            expect_label=
            f"school count: total {len(actual_per_school_type_dict[k])}",
            testprefix="school size total (student only)\n" + test_prefix +
            " " + k,
            binned=False,
            do_close=do_close)
        # statistic_test is not working yet because school sizes are now available by school type. Also depends strongly on population size.
        if not skip_stat_check:
            utilities.statistic_test(expected_values,
                                     actual_values,
                                     test="x",
                                     comments="school size check")
        # check average school size
        school_size_brackets = sp.get_school_size_brackets(
            datadir=datadir,
            location=location,
            country_location=country_location,
            state_location=state_location)
        # calculate the average school size per bracket
        average_school_size_in_bracket = [
            sum(i) / len(i) for i in school_size_brackets.values()
        ]

        # calculate expected school size based on expected value sum(distribution * size)
        expected_average_school_size = sum([
            v[1] * average_school_size_in_bracket[v[0]]
            for v in expected_school_size_by_brackets.items()
        ])
        actual_average_school_size = sum(
            [i * actual_scount[i]
             for i in actual_scount]) / sum(actual_scount.values())
        utilities.check_error_percentage(n,
                                         expected_average_school_size,
                                         actual_average_school_size,
                                         name=f"average school size:'{k}'")
    # check school count distribution
    utilities.plot_array([
        len(actual_per_school_type_dict[i])
        for i in actual_per_school_type_dict
    ],
                         names=list(actual_per_school_type_dict.keys()),
                         datadir=figdir,
                         expect_label="school count",
                         testprefix="school count " + test_prefix,
                         value_text=True)

Esempio n. 3

Mostra file

File: utilities_dist.py Progetto: TTI-modelling/synthpops

def check_work_size_distribution(pop,
                                 n,
                                 datadir,
                                 figdir,
                                 location=None,
                                 state_location=None,
                                 country_location=None,
                                 file_path=None,
                                 use_default=False,
                                 test_prefix="",
                                 skip_stat_check=False,
                                 do_close=True):
    """
    Check the population workplace size distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "work_size")
    wb = sp.get_workplace_size_brackets(datadir=datadir,
                                        location=location,
                                        state_location=state_location,
                                        country_location=country_location,
                                        file_path=file_path,
                                        use_default=use_default)
    ws = sp.norm_dic(
        sp.get_workplace_size_distr_by_brackets(
            datadir=datadir,
            location=location,
            state_location=state_location,
            country_location=country_location,
            file_path=file_path,
            use_default=use_default))
    ws_index = sp.get_index_by_brackets_dic(wb)
    upper_bound = max(ws_index.keys())
    actual_work_dist, actual_work_dist_none = utilities.get_ids_count_by_param(
        pop, "wpid")
    actual_worksizes = {}
    for v in actual_work_dist.values():
        if v > upper_bound:
            v = upper_bound
        actual_worksizes.setdefault(ws_index[v], 0)
        actual_worksizes[ws_index[v]] += 1

    actual_values = np.zeros(len(ws.keys()))
    for i in range(0, len(ws.keys())):
        if i in actual_worksizes:
            actual_values[i] = actual_worksizes[i]
    actual_values = actual_values / np.nansum(actual_values)
    expected_values = np.array(list(ws.values()))
    xlabels = [str(wb[b][0]) + '-' + str(wb[b][-1]) for b in sorted(wb.keys())]
    utilities.plot_array(expected_values,
                         actual_values,
                         names=xlabels,
                         datadir=figdir,
                         testprefix="work size distribution " + test_prefix,
                         do_close=do_close,
                         xlabel_rotation=50)
    if not skip_stat_check:
        utilities.statistic_test(expected_values,
                                 actual_values,
                                 test="x",
                                 comments="work size distribution check")

Esempio n. 4

Mostra file

File: utilities_dist.py Progetto: TTI-modelling/synthpops

def check_household_distribution(pop,
                                 n,
                                 datadir,
                                 figdir,
                                 location=None,
                                 state_location=None,
                                 country_location=None,
                                 file_path=None,
                                 use_default=False,
                                 test_prefix="",
                                 skip_stat_check=False,
                                 do_close=True):
    """
    Check the household size distribution against the reference data

    Args:
        pop              : population dictionary
        n                : population size
        datadir          : root data directory which has resides the reference data
        figdir           : directory where to result files are saved
        location         : name of the location
        state_location   : name of the state the location is in
        country_location : name of the country the location is in
        file_path        : file path to user specified gender by age bracket distribution data
        use_default      : if True, try to first use the other parameters to find data specific to the location
                           under study, otherwise returns default data drawing from Seattle, Washington.
        test_prefix      : used for prefix of the plot title
        skip_stat_check  : skip the statistics check for distribution
        do_close         : close the image immediately if set to True

    Returns:
        None.

    Plots will be save to figdir if provided
    """
    figdir = os.path.join(figdir, "household")
    hs = sp.get_household_size_distr(datadir=datadir,
                                     location=location,
                                     state_location=state_location,
                                     country_location=country_location,
                                     file_path=file_path,
                                     use_default=use_default)
    actual_households, actual_households_none = utilities.get_ids_count_by_param(
        pop, "hhid")
    assert actual_households_none == {}, "all entries must have household ids"
    actual_household_count = dict(Counter(actual_households.values()))
    sorted_actual_household_count = {}
    for i in sorted(actual_household_count):
        sorted_actual_household_count[i] = actual_household_count[i]
    actual_values = np.array(
        list(sp.norm_dic(sorted_actual_household_count).values()))
    expected_values = np.array(list(hs.values()))
    utilities.plot_array(expected_values,
                         actual_values,
                         names=[x for x in list(hs.keys())],
                         datadir=figdir,
                         testprefix="household count percentage " +
                         test_prefix,
                         do_close=do_close,
                         value_text=True)

    if not skip_stat_check:
        utilities.statistic_test(expected_values,
                                 actual_values,
                                 test="x",
                                 comments="household count percentage check")
    # check average household size
    expected_average_household_size = round(
        sum([(i + 1) * expected_values[np.where(i)]
             for i in expected_values])[0], 3)
    actual_average_household_size = round(
        sum([(i + 1) * actual_values[np.where(i)] for i in actual_values])[0],
        3)
    print(
        f"expected average household size: {expected_average_household_size}\n"
        f"actual average household size: {actual_average_household_size}")
    utilities.check_error_percentage(n,
                                     expected_average_household_size,
                                     actual_average_household_size,
                                     name="average household size")