def get_school_type_age_ranges(location_data): """ Read in the school type and age range data from csv files and format the data to add to the location_data json object. Args: location_data (sp.Location) : json-based data object for the location Returns: dict : An dictionary mapping school type to the distinct age range for each school type. """ df = pd.read_csv( os.path.join( sp.settings.datadir, location_data.location_name, f"{location_data.location_name}_school_type_age_ranges.csv")) arr = sp.convert_df_to_json_array(df, cols=df.columns, int_cols=['age_min', 'age_max']) school_type_age_ranges = [] for si in range(len(arr)): s = sp.SchoolTypeByAge() school_type = arr[si][0] s.school_type = school_type s.age_range = [arr[si][1], arr[si][2]] school_type_age_ranges.append(s) location_data.school_types_by_age = school_type_age_ranges return location_data
def get_age_dist_arr(location_data, num_agebrackets=16): """ Read in age distribution data from csv files and format the data to add to the location_data json object. Args: location_data (sp.Location) : json-based data object for the location num_agebrackets (int) : the number of age brackets or bins Returns: array : An array with dimensions (number of age brackets, 3) with data on the age distribution. """ df = pd.read_csv( os.path.join( sp.settings.datadir, location_data.location_name, f"{location_data.location_name}_ages_{num_agebrackets}.csv")) age_dist_arr = sp.convert_df_to_json_array(df, cols=df.columns, int_cols=['age_min', 'age_max']) age_dist = sp.PopulationAgeDistribution() age_dist.num_bins = len(age_dist_arr) age_dist.distribution = age_dist_arr return age_dist
def process_enrollment_rates(location_data): """ Read in enrollment rates from csv files and format the data to add to the location_data json object. Args: location_data (sp.Location) : json-based data object for the location Returns: sp.Location : location_data """ raw_data_path = os.path.join(sp.settings.datadir, 'Nepal') en_df = pd.read_csv(os.path.join(raw_data_path, 'enrollment_by_age.csv')) age_bin_labels = en_df['Age'].values binned_rates = en_df['EnrollmentRate'].values enrollment_rates = dict.fromkeys(np.arange(101), 0) for bi, bl in enumerate(age_bin_labels): b = bl.split('-') b0, b1 = int(b[0]), int(b[1]) for a in range(b0, b1 + 1): enrollment_rates[a] = binned_rates[bi] enrollment_rates_df = pd.DataFrame.from_dict( dict(age=np.arange(len(enrollment_rates)), percent=[ enrollment_rates[a] for a in sorted(enrollment_rates.keys()) ])) location_data.enrollment_rates_by_age = sp.convert_df_to_json_array( enrollment_rates_df, cols=enrollment_rates_df.columns, int_cols=['age']) return location_data
def get_household_size_dist_arr(location_data): """ Read in household size distribution from csv files and format the data to add to the location_data json object. Args: location_data (sp.Location) : json-based data object for the location Returns: array : An array with dimensions (number of household sizes, 2) with data on the household size distribution. """ df = pd.read_csv(os.path.join(sp.settings.datadir, location_data.location_name, f'{location_data.location_name}_household_sizes.csv')) household_size_dist_arr = sp.convert_df_to_json_array(df, cols=df.columns, int_cols=['household_size']) return household_size_dist_arr
def process_enrollment_rates(location_data): """Process and return enrollment data.""" raw_data_path = os.path.join(sp.settings.datadir, location_data.location_name) en_df = pd.read_csv(os.path.join(raw_data_path, f'{location_data.location_name}_enrollment_rates_binned_by_age.csv')) binned_rates = en_df['percent'].values enrollment_rates = dict.fromkeys(np.arange(101), 0) for bi in range(len(en_df)): b0 = en_df['age_min'].values[bi] b1 = en_df['age_max'].values[bi] for a in range(b0, b1 + 1): enrollment_rates[a] = binned_rates[bi] enrollment_rates_df = pd.DataFrame.from_dict(dict(age=np.arange(len(enrollment_rates)), percent=[enrollment_rates[a] for a in sorted(enrollment_rates.keys())])) return sp.convert_df_to_json_array(enrollment_rates_df, cols=enrollment_rates_df.columns, int_cols=['age'])
def get_workplace_size_dist_arr(location_data): """ Read in workplace size distribution data from csv files and format the data to add to the location_data json object. Args: location_data (sp.Location) : json-based data object for the location Returns: array : An array with dimensions (number of workplace size brackets, 3) with data on the workplace size distribution. """ df = pd.read_csv( os.path.join(sp.settings.datadir, location_data.location_name, f"{location_data.location_name}_workplace_sizes.csv")) workplace_size_dist_arr = sp.convert_df_to_json_array( df, cols=df.columns, int_cols=['workplace_size_min', 'workplace_size_max']) return workplace_size_dist_arr
def get_enrollment_rates_arr(location_data): """ Read in enrollment rates from csv files and format the data to add to the location_data json object. Args: location_data (sp.Location) : json-based data object for the location Returns: array : An array with dimensions (101, 2) with data on the enrollment rates for ages 0 through 100. """ df = pd.read_csv( os.path.join( sp.settings.datadir, location_data.location_name, f'{location_data.location_name}_enrollment_rates_by_age.csv')) enrollment_rates_arr = sp.convert_df_to_json_array(df, cols=df.columns, int_cols=['age']) return enrollment_rates_arr
def setup_convert_df_to_json_array(self, pars): """ Set up objects to compare. Args: pars (dict): dictionary to get the data array and json array for comparison. Returns: array, json.array : An array of the desired data from a dataframe and the json entry for comparison. """ df = pd.read_csv(pars.filepath) # columns to include : include all by default if pars.cols_ind == []: cols = df.columns else: cols = df.columns[ pars. cols_ind] # use indices to indicate which columns to include if pars.int_cols_ind == []: int_cols = pars.int_cols_ind else: int_cols = list(df.columns[pars.int_cols_ind].values) # array-ify all the data, convert some columns to integers arr = sp.convert_df_to_json_array(df, cols, int_cols) # corresponding json data object for the same location and data location = sp.load_location_from_filepath(f"{pars.location_name}.json") json_array = getattr(location, pars.property_name) if pars.property_name == 'population_age_distributions': json_array = [j for j in json_array if j.num_bins == len(arr)][0].distribution return arr, json_array
def process_age_dists(location_data): """ Read in age distribution data from csv files and format the data to add to the location_data json object. Args: location_data (sp.Location) : json-based data object for the location Returns: sp.Location : location_data """ raw_data_path = os.path.join(sp.settings.datadir, 'Nepal') age_count_df = pd.read_csv(os.path.join(raw_data_path, 'Nepal-2019.csv')) age_count = np.array(age_count_df['M']) + np.array(age_count_df['F']) age_dist = age_count / age_count.sum() age_bin_labels = age_count_df['Age'].values data = dict() data['age_min'] = [] data['age_max'] = [] data['age_dist'] = [] for bi, bl in enumerate(age_bin_labels): try: b = bl.split('-') b0, b1 = int(b[0]), int(b[1]) except: b = bl.split('+') b0, b1 = int(b[0]), int(b[0]) data['age_min'].append(b0) data['age_max'].append(b1) data['age_dist'].append(age_dist[bi]) for k in data: data[k] = np.array(data[k]) df = pd.DataFrame.from_dict(data) age_dist_arr = sp.convert_df_to_json_array(df, cols=df.columns, int_cols=['age_min', 'age_max']) location_data.population_age_distributions.append( sp.PopulationAgeDistribution()) location_data.population_age_distributions[0].num_bins = len(age_dist_arr) location_data.population_age_distributions[0].distribution = age_dist_arr data_16 = sc.dcp(data) data_16['age_min'] = data_16['age_min'][:-5] data_16['age_max'] = data_16['age_max'][:-5] data_16['age_max'][-1] = 100 data_16['age_dist'][-6] = data_16['age_dist'][-6:].sum() data_16['age_dist'] = data_16['age_dist'][:-5] df_16 = pd.DataFrame.from_dict(data_16) age_dist_arr_16 = sp.convert_df_to_json_array( df_16, cols=df_16.columns, int_cols=['age_min', 'age_max']) location_data.population_age_distributions.append( sp.PopulationAgeDistribution()) location_data.population_age_distributions[1].num_bins = len( age_dist_arr_16) location_data.population_age_distributions[ 1].distribution = age_dist_arr_16 return location_data