コード例 #1
0
def get_school_type_age_ranges(location_data):
    """
    Read in the school type and age range data from csv files and format the
    data to add to the location_data json object.

    Args:
        location_data (sp.Location) : json-based data object for the location

    Returns:
        dict : An dictionary mapping school type to the distinct age range for
        each school type.
    """
    df = pd.read_csv(
        os.path.join(
            sp.settings.datadir, location_data.location_name,
            f"{location_data.location_name}_school_type_age_ranges.csv"))
    arr = sp.convert_df_to_json_array(df,
                                      cols=df.columns,
                                      int_cols=['age_min', 'age_max'])

    school_type_age_ranges = []
    for si in range(len(arr)):
        s = sp.SchoolTypeByAge()
        school_type = arr[si][0]
        s.school_type = school_type
        s.age_range = [arr[si][1], arr[si][2]]
        school_type_age_ranges.append(s)
    location_data.school_types_by_age = school_type_age_ranges

    return location_data
コード例 #2
0
def get_age_dist_arr(location_data, num_agebrackets=16):
    """
    Read in age distribution data from csv files and format the data to add to
    the location_data json object.

    Args:
        location_data (sp.Location) : json-based data object for the location
        num_agebrackets (int) : the number of age brackets or bins

    Returns:
        array : An array with dimensions (number of age brackets, 3) with data
        on the age distribution.
    """
    df = pd.read_csv(
        os.path.join(
            sp.settings.datadir, location_data.location_name,
            f"{location_data.location_name}_ages_{num_agebrackets}.csv"))
    age_dist_arr = sp.convert_df_to_json_array(df,
                                               cols=df.columns,
                                               int_cols=['age_min', 'age_max'])

    age_dist = sp.PopulationAgeDistribution()
    age_dist.num_bins = len(age_dist_arr)
    age_dist.distribution = age_dist_arr
    return age_dist
コード例 #3
0
def process_enrollment_rates(location_data):
    """
    Read in enrollment rates from csv files and format the data to add to the
    location_data json object.

    Args:
        location_data (sp.Location) : json-based data object for the location

    Returns:
        sp.Location : location_data
    """
    raw_data_path = os.path.join(sp.settings.datadir, 'Nepal')
    en_df = pd.read_csv(os.path.join(raw_data_path, 'enrollment_by_age.csv'))
    age_bin_labels = en_df['Age'].values
    binned_rates = en_df['EnrollmentRate'].values
    enrollment_rates = dict.fromkeys(np.arange(101), 0)
    for bi, bl in enumerate(age_bin_labels):
        b = bl.split('-')
        b0, b1 = int(b[0]), int(b[1])

        for a in range(b0, b1 + 1):
            enrollment_rates[a] = binned_rates[bi]

    enrollment_rates_df = pd.DataFrame.from_dict(
        dict(age=np.arange(len(enrollment_rates)),
             percent=[
                 enrollment_rates[a] for a in sorted(enrollment_rates.keys())
             ]))
    location_data.enrollment_rates_by_age = sp.convert_df_to_json_array(
        enrollment_rates_df,
        cols=enrollment_rates_df.columns,
        int_cols=['age'])

    return location_data
コード例 #4
0
def get_household_size_dist_arr(location_data):
    """
    Read in household size distribution from csv files and format the data to
    add to the location_data json object.

    Args:
        location_data (sp.Location) : json-based data object for the location

    Returns:
        array : An array with dimensions (number of household sizes, 2) with data
        on the household size distribution.
    """
    df = pd.read_csv(os.path.join(sp.settings.datadir, location_data.location_name, f'{location_data.location_name}_household_sizes.csv'))
    household_size_dist_arr = sp.convert_df_to_json_array(df, cols=df.columns, int_cols=['household_size'])
    return household_size_dist_arr
コード例 #5
0
def process_enrollment_rates(location_data):
    """Process and return enrollment data."""

    raw_data_path = os.path.join(sp.settings.datadir, location_data.location_name)
    en_df = pd.read_csv(os.path.join(raw_data_path, f'{location_data.location_name}_enrollment_rates_binned_by_age.csv'))
    binned_rates = en_df['percent'].values
    enrollment_rates = dict.fromkeys(np.arange(101), 0)
    for bi in range(len(en_df)):
        b0 = en_df['age_min'].values[bi]
        b1 = en_df['age_max'].values[bi]

        for a in range(b0, b1 + 1):
            enrollment_rates[a] = binned_rates[bi]

    enrollment_rates_df = pd.DataFrame.from_dict(dict(age=np.arange(len(enrollment_rates)), percent=[enrollment_rates[a] for a in sorted(enrollment_rates.keys())]))
    return sp.convert_df_to_json_array(enrollment_rates_df, cols=enrollment_rates_df.columns, int_cols=['age'])
コード例 #6
0
def get_workplace_size_dist_arr(location_data):
    """
    Read in workplace size distribution data from csv files and format the data
    to add to the location_data json object.

    Args:
        location_data (sp.Location) : json-based data object for the location

    Returns:
        array : An array with dimensions (number of workplace size brackets, 3) with data
        on the workplace size distribution.
    """
    df = pd.read_csv(
        os.path.join(sp.settings.datadir, location_data.location_name,
                     f"{location_data.location_name}_workplace_sizes.csv"))
    workplace_size_dist_arr = sp.convert_df_to_json_array(
        df,
        cols=df.columns,
        int_cols=['workplace_size_min', 'workplace_size_max'])
    return workplace_size_dist_arr
コード例 #7
0
def get_enrollment_rates_arr(location_data):
    """
    Read in enrollment rates from csv files and format the data to add to the
    location_data json object.

    Args:
        location_data (sp.Location) : json-based data object for the location

    Returns:
        array : An array with dimensions (101, 2) with data
        on the enrollment rates for ages 0 through 100.
    """
    df = pd.read_csv(
        os.path.join(
            sp.settings.datadir, location_data.location_name,
            f'{location_data.location_name}_enrollment_rates_by_age.csv'))
    enrollment_rates_arr = sp.convert_df_to_json_array(df,
                                                       cols=df.columns,
                                                       int_cols=['age'])
    return enrollment_rates_arr
コード例 #8
0
    def setup_convert_df_to_json_array(self, pars):
        """
        Set up objects to compare.

        Args:
            pars (dict): dictionary to get the data array and json array for comparison.

        Returns:
            array, json.array : An array of the desired data from a dataframe and
            the json entry for comparison.
        """
        df = pd.read_csv(pars.filepath)

        # columns to include : include all by default
        if pars.cols_ind == []:
            cols = df.columns
        else:
            cols = df.columns[
                pars.
                cols_ind]  # use indices to indicate which columns to include

        if pars.int_cols_ind == []:
            int_cols = pars.int_cols_ind
        else:
            int_cols = list(df.columns[pars.int_cols_ind].values)

        # array-ify all the data, convert some columns to integers
        arr = sp.convert_df_to_json_array(df, cols, int_cols)

        # corresponding json data object for the same location and data
        location = sp.load_location_from_filepath(f"{pars.location_name}.json")

        json_array = getattr(location, pars.property_name)

        if pars.property_name == 'population_age_distributions':
            json_array = [j for j in json_array
                          if j.num_bins == len(arr)][0].distribution

        return arr, json_array
コード例 #9
0
def process_age_dists(location_data):
    """
    Read in age distribution data from csv files and format the data to add to
    the location_data json object.

    Args:
        location_data (sp.Location) : json-based data object for the location

    Returns:
        sp.Location : location_data
    """
    raw_data_path = os.path.join(sp.settings.datadir, 'Nepal')
    age_count_df = pd.read_csv(os.path.join(raw_data_path, 'Nepal-2019.csv'))

    age_count = np.array(age_count_df['M']) + np.array(age_count_df['F'])
    age_dist = age_count / age_count.sum()

    age_bin_labels = age_count_df['Age'].values
    data = dict()
    data['age_min'] = []
    data['age_max'] = []
    data['age_dist'] = []
    for bi, bl in enumerate(age_bin_labels):
        try:
            b = bl.split('-')
            b0, b1 = int(b[0]), int(b[1])
        except:
            b = bl.split('+')
            b0, b1 = int(b[0]), int(b[0])

        data['age_min'].append(b0)
        data['age_max'].append(b1)
        data['age_dist'].append(age_dist[bi])

    for k in data:
        data[k] = np.array(data[k])

    df = pd.DataFrame.from_dict(data)
    age_dist_arr = sp.convert_df_to_json_array(df,
                                               cols=df.columns,
                                               int_cols=['age_min', 'age_max'])

    location_data.population_age_distributions.append(
        sp.PopulationAgeDistribution())
    location_data.population_age_distributions[0].num_bins = len(age_dist_arr)
    location_data.population_age_distributions[0].distribution = age_dist_arr

    data_16 = sc.dcp(data)
    data_16['age_min'] = data_16['age_min'][:-5]
    data_16['age_max'] = data_16['age_max'][:-5]
    data_16['age_max'][-1] = 100
    data_16['age_dist'][-6] = data_16['age_dist'][-6:].sum()
    data_16['age_dist'] = data_16['age_dist'][:-5]

    df_16 = pd.DataFrame.from_dict(data_16)
    age_dist_arr_16 = sp.convert_df_to_json_array(
        df_16, cols=df_16.columns, int_cols=['age_min', 'age_max'])
    location_data.population_age_distributions.append(
        sp.PopulationAgeDistribution())
    location_data.population_age_distributions[1].num_bins = len(
        age_dist_arr_16)
    location_data.population_age_distributions[
        1].distribution = age_dist_arr_16

    return location_data