def get_education_profile(geo_code, geo_level, session): db_model = get_model_from_fields( ['highest educational level 20 and older'], geo_level) objects = get_objects_by_geo(db_model, geo_code, geo_level, session) edu_dist_data = {} get_or_higher = 0.0 fet_or_higher = 0.0 total = 0.0 for i, obj in enumerate(objects): category_val = getattr(obj, 'highest educational level 20 and older') # increment counters total += obj.total if category_val in EDUCATION_GET_OR_HIGHER: get_or_higher += obj.total if category_val in EDUCATION_FET_OR_HIGHER: fet_or_higher += obj.total # add data points for category edu_dist_data[str(i)] = { "name": category_val, "numerators": { "this": obj.total }, } edu_dist_data = collapse_categories(edu_dist_data, COLLAPSED_EDUCATION_CATEGORIES, key_order=EDUCATION_KEY_ORDER) edu_split_data = { 'percent_get_or_higher': { "name": "Completed Grade 9 or higher", "numerators": { "this": get_or_higher }, }, 'percent_fet_or_higher': { "name": "Completed Matric or higher", "numerators": { "this": fet_or_higher }, } } # calculate percentages for data in (edu_dist_data, edu_split_data): for fields in data.values(): fields["values"] = { "this": round(fields["numerators"]["this"] / total * 100, 2) } edu_dist_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} edu_split_data['metadata'] = {'universe': 'Invididuals aged 20 and older'} add_metadata(edu_dist_data, db_model) return { 'educational_attainment_distribution': edu_dist_data, 'educational_attainment': edu_split_data }
def store_values(self): session = get_session() province_codes = dict( (p.name, p.code) for p in session.query(Province)) session.commit() # cache of the db models for each geo level models = {} count = 0 for geo_name, values in self.read_rows(): count += 1 geo_level = self.determine_level(geo_name) print geo_level, geo_name if geo_level == 'province': code = province_codes[geo_name] elif geo_level == 'country': code = 'ZA' else: code = geo_name.split(':')[0] base_kwargs = {'%s_code' % geo_level: code} # get db model and create table if necessary if geo_level in models: db_model = models[geo_level] else: if self.table_name: table_name = self.table_name + '_' + geo_level else: table_name = None models[geo_level] = db_model = get_model_from_fields( self.fields, geo_level, table_name) Base.metadata.create_all(_engine, tables=[db_model.__table__]) for category, value in zip(self.categories, values): # prepare the dict of args to pass to the db model for this row kwargs = base_kwargs.copy() if value.strip() == '-': value = '0' kwargs.update( dict((f, v) for f, v in zip(self.fields, category))) kwargs['total'] = int(value.replace(',', '')) # create and add the row session.add(db_model(**kwargs)) if count % 100 == 0: session.flush() session.commit() session.close()
def get_education_profile(geo_code, geo_level, session): db_model = get_model_from_fields(['highest educational level'], geo_level, 'highesteducationallevel_%s_25andover' % geo_level) objects = get_objects_by_geo(db_model, geo_code, geo_level, session) edu_dist_data = {} get_or_higher = 0.0 fet_or_higher = 0.0 total = 0.0 for i, obj in enumerate(objects): category_val = getattr(obj, 'highest educational level') # increment counters total += obj.total if category_val in EDUCATION_GET_OR_HIGHER: get_or_higher += obj.total if category_val in EDUCATION_FET_OR_HIGHER: fet_or_higher += obj.total # add data points for category edu_dist_data[str(i)] = { "name": category_val, "numerators": {"this": obj.total}, } edu_dist_data = collapse_categories(edu_dist_data, COLLAPSED_EDUCATION_CATEGORIES, key_order=('None', 'Other', 'Some primary', 'Primary', 'Grade 9', 'Some secondary', 'Grade 12 (Matric)', 'Undergrad', 'Post-grad')) edu_split_data = { 'percent_get_or_higher': { "name": "Completed Grade 9 or higher", "numerators": {"this": get_or_higher}, }, 'percent_fet_or_higher': { "name": "Completed Matric or higher", "numerators": {"this": fet_or_higher}, } } # calculate percentages for data in (edu_dist_data, edu_split_data): for fields in data.values(): fields["values"] = {"this": round(fields["numerators"]["this"] / total * 100, 2)} edu_dist_data['metadata'] = {'universe': 'Invididuals 25 and over'} edu_split_data['metadata'] = {'universe': 'Invididuals 25 and over'} add_metadata(edu_dist_data, db_model) return {'educational_attainment_distribution': edu_dist_data, 'educational_attainment': edu_split_data}
def store_values(self): session = get_session() county_codes = dict((p.name.upper().replace('-', ' '), p.code) for p in session.query(County)) session.commit() # cache of the db models for each geo level models = {} count = 0 for geo_level, geo_name, category, total in self.read_rows(): count += 1 print geo_level, geo_name, category, total if geo_level == 'county': code = county_codes[geo_name.upper()] elif geo_level == 'country': code = 'KE' else: raise ValueError(geo_level) base_kwargs = {'%s_code' % geo_level: code} if code else {} # get db model and create table if necessary if geo_level in models: db_model = models[geo_level] else: if self.table_name: table_name = self.table_name + '_' + geo_level else: table_name = None try: models[geo_level] = db_model = get_model_from_fields(self.fields, geo_level, table_name) except ValueError as e: raise ValueError('%s. Have you declared this field in a table in censusreporter/api/models/tables.py?' % e.message) Base.metadata.create_all(_engine, tables=[db_model.__table__]) self.table_names.append(db_model.__table__.name) # prepare the dict of args to pass to the db model for this row kwargs = base_kwargs.copy() kwargs.update(dict((f, c) for f, c in zip(self.fields, category))) kwargs['total'] = total # create and add the row session.add(db_model(**kwargs)) if count % 100 == 0: session.flush() session.commit() session.close()
def store_values(self): session = get_session() province_codes = dict((p.name, p.code) for p in session.query(Province)) session.commit() # cache of the db models for each geo level models = {} count = 0 for geo_name, values in self.read_rows(): count += 1 geo_level = self.determine_level(geo_name) print geo_level, geo_name if geo_level == 'province': code = province_codes[geo_name] elif geo_name == 'country': code = 'ZA' else: code = geo_name.split(':')[0] base_kwargs = {'%s_code' % geo_level: code} # get db model and create table if necessary if geo_level in models: db_model = models[geo_level] else: if self.table_name: table_name = self.table_name + '_' + geo_level else: table_name = None models[geo_level] = db_model = get_model_from_fields(self.fields, geo_level, table_name) Base.metadata.create_all(_engine, tables=[db_model.__table__]) for category, value in zip(self.categories, values): # prepare the dict of args to pass to the db model for this row kwargs = base_kwargs.copy() if value.strip() == '-': value = '0' kwargs.update(dict((f, v) for f, v in zip(self.fields, category))) kwargs['total'] = int(value.replace(',', '')) # create and add the row session.add(db_model(**kwargs)) if count % 100 == 0: session.flush() session.commit() session.close()
def get_stat_data(fields, geo_level, geo_code, session, order_by=None, percent=True, total=None, table_fields=None, table_name=None, only=None, exclude=None, exclude_zero=False, recode=None, key_order=None): """ This is our primary helper routine for building a dictionary suitable for a place's profile page, based on a statistic. It sums over the data for +fields+ in the database for the place identified by +geo_level+ and +geo_code+ and calculates numerators and values. If multiple fields are given, it creates nested result dictionaries. Control the rows that are included or ignored using +only+, +exclude+ and +exclude_zero+. The field values can be recoded using +recode+ and and re-ordered using +key_order+. :param str or list fields: the census field to build stats for. Specify a list of fields to build nested statistics. If multiple fields are specified, then the values of parameters such as +only+, +exclude+ and +recode+ will change. These must be fields in `api.models.census.census_fields`, e.g. 'highest educational level' :param str geo_level: the geographical level :param str geo_code: the geographical code :param dbsession session: sqlalchemy session :param str order_by: field to order by, or None for default, eg. '-total' :param bool percent: should we calculate percentages, or just sum raw values? :param list table_fields: list of fields to use to find the table, defaults to `fields` :param int total: the total value to use for percentages, or None to total columns automatically :param str table_name: override the table name, otherwise it's calculated from the fields and geo_level :param dict or list only: only include these field values. If +fields+ has many items, this must be a dict mapping field names to a list of strings. :param doct or list exclude: ignore these field values. If +fields+ has many items, this must be a dict mapping field names to a list of strings. Field names are checked before any recoding. :param bool exclude_zero: ignore fields that have a zero total :param dict or lambda: function or dict to recode values of +key_field+. If +fields+ is a singleton, then the keys of this dict must be the values to recode from, otherwise they must be the field names and then the values. If this is a lambda, it is called with the field name and its value as arguments. :param dict or list key_order: ordering for keys in result dictionary. If +fields+ has many items, this must be a dict from field names to orderings. The default ordering is determined by +order+. :return: (data-dictionary, total) """ if not isinstance(fields, list): fields = [fields] n_fields = len(fields) many_fields = n_fields > 1 if order_by is None: order_by = fields[0] if only is not None: if not isinstance(only, dict): if many_fields: raise ValueError("If many fields are given, then only must be a dict. I got %s instead" % only) else: only = {fields[0]: set(only)} if exclude is not None: if not isinstance(exclude, dict): if many_fields: raise ValueError("If many fields are given, then exclude must be a dict. I got %s instead" % exclude) else: exclude = {fields[0]: set(exclude)} if key_order: if not isinstance(key_order, dict): if many_fields: raise ValueError("If many fields are given, then key_order must be a dict. I got %s instead" % key_order) else: key_order = {fields[0]: key_order} else: key_order = {} if total is not None and many_fields: raise ValueError("Cannot specify a total if many fields are given") if recode: if not isinstance(recode, dict) or not many_fields: recode = dict((f, recode) for f in fields) model = get_model_from_fields(table_fields or fields, geo_level, table_name) objects = get_objects_by_geo(model, geo_code, geo_level, session, fields=fields, order_by=order_by) root_data = OrderedDict() our_total = {} def get_data_object(obj): """ Recurse down the list of fields and return the final resting place for data for this stat. """ data = root_data for i, field in enumerate(fields): key = getattr(obj, field) if only and key not in only.get(field, {}): return key, None if exclude and key in exclude.get(field, {}): return key, None if recode and field in recode: recoder = recode[field] if isinstance(recoder, dict): key = recoder.get(key, key) else: key = recoder(field, key) else: key = capitalize(key) # enforce key ordering if not data and field in key_order: for fld in key_order[field]: data[fld] = OrderedDict() # ensure it's there if key not in data: data[key] = OrderedDict() data = data[key] # default values for intermediate fields if data and i < n_fields-1: data['metadata'] = {'name': key} # data is now the dict where the end value is going to go if not data: data['name'] = key data['numerators'] = {'this': 0.0} return key, data # run the stats for the objects for obj in objects: if obj.total == 0 and exclude_zero: continue # get the data dict where these values must go key, data = get_data_object(obj) if not data: continue our_total[key] = our_total.get(key, 0.0) + obj.total data['numerators']['this'] += obj.total if total is not None: grand_total = total else: grand_total = sum(our_total.values()) # add in percentages if percent: def calc_percent(data): for key, data in data.iteritems(): if not key == 'metadata': if 'numerators' in data: tot = our_total[key] if many_fields else grand_total perc = 0 if tot == 0 else (data['numerators']['this'] / tot * 100) data['values'] = {'this': round(perc, 2)} else: calc_percent(data) calc_percent(root_data) add_metadata(root_data, model) return root_data, grand_total
def get_stat_data(fields, geo_level, geo_code, session, order_by=None, percent=True, total=None, table_fields=None, table_name=None, only=None, exclude=None, exclude_zero=False, recode=None, key_order=None): """ This is our primary helper routine for building a dictionary suitable for a place's profile page, based on a statistic. It sums over the data for +fields+ in the database for the place identified by +geo_level+ and +geo_code+ and calculates numerators and values. If multiple fields are given, it creates nested result dictionaries. Control the rows that are included or ignored using +only+, +exclude+ and +exclude_zero+. The field values can be recoded using +recode+ and and re-ordered using +key_order+. :param str or list fields: the census field to build stats for. Specify a list of fields to build nested statistics. If multiple fields are specified, then the values of parameters such as +only+, +exclude+ and +recode+ will change. These must be fields in `api.models.census.census_fields`, e.g. 'highest educational level' :param str geo_level: the geographical level :param str geo_code: the geographical code :param dbsession session: sqlalchemy session :param str order_by: field to order by, or None for default, eg. '-total' :param bool percent: should we calculate percentages, or just sum raw values? :param list table_fields: list of fields to use to find the table, defaults to `fields` :param int total: the total value to use for percentages, or None to total columns automatically :param str table_name: override the table name, otherwise it's calculated from the fields and geo_level :param dict or list only: only include these field values. If +fields+ has many items, this must be a dict mapping field names to a list of strings. :param dict or list exclude: ignore these field values. If +fields+ has many items, this must be a dict mapping field names to a list of strings. Field names are checked before any recoding. :param bool exclude_zero: ignore fields that have a zero total :param dict or lambda: function or dict to recode values of +key_field+. If +fields+ is a singleton, then the keys of this dict must be the values to recode from, otherwise they must be the field names and then the values. If this is a lambda, it is called with the field name and its value as arguments. :param dict or list key_order: ordering for keys in result dictionary. If +fields+ has many items, this must be a dict from field names to orderings. The default ordering is determined by +order+. :return: (data-dictionary, total) """ if not isinstance(fields, list): fields = [fields] n_fields = len(fields) many_fields = n_fields > 1 if order_by is None: order_by = fields[0] if only is not None: if not isinstance(only, dict): if many_fields: raise ValueError( "If many fields are given, then only must be a dict. I got %s instead" % only) else: only = {fields[0]: set(only)} if exclude is not None: if not isinstance(exclude, dict): if many_fields: raise ValueError( "If many fields are given, then exclude must be a dict. I got %s instead" % exclude) else: exclude = {fields[0]: set(exclude)} if key_order: if not isinstance(key_order, dict): if many_fields: raise ValueError( "If many fields are given, then key_order must be a dict. I got %s instead" % key_order) else: key_order = {fields[0]: key_order} else: key_order = {} if total is not None and many_fields: raise ValueError("Cannot specify a total if many fields are given") if recode: if not isinstance(recode, dict) or not many_fields: recode = dict((f, recode) for f in fields) model = get_model_from_fields(table_fields or fields, geo_level, table_name) objects = get_objects_by_geo(model, geo_code, geo_level, session, fields=fields, order_by=order_by) root_data = OrderedDict() our_total = {} def get_data_object(obj): """ Recurse down the list of fields and return the final resting place for data for this stat. """ data = root_data for i, field in enumerate(fields): key = getattr(obj, field) if only and field in only and key not in only.get(field, {}): return key, None if exclude and key in exclude.get(field, {}): return key, None if recode and field in recode: recoder = recode[field] if isinstance(recoder, dict): key = recoder.get(key, key) else: key = recoder(field, key) else: key = capitalize(key) # enforce key ordering if not data and field in key_order: for fld in key_order[field]: data[fld] = OrderedDict() # ensure it's there if key not in data: data[key] = OrderedDict() data = data[key] # default values for intermediate fields if data is not None and i < n_fields - 1: data['metadata'] = {'name': key} # data is now the dict where the end value is going to go if not data: data['name'] = key data['numerators'] = {'this': 0.0} return key, data # run the stats for the objects for obj in objects: if obj.total == 0 and exclude_zero: continue # get the data dict where these values must go key, data = get_data_object(obj) if not data: continue our_total[key] = our_total.get(key, 0.0) + obj.total data['numerators']['this'] += obj.total if total is not None: grand_total = total else: grand_total = sum(our_total.values()) # add in percentages def calc_percent(data): for key, data in data.iteritems(): if not key == 'metadata': if 'numerators' in data: if percent: tot = our_total[key] if many_fields else grand_total perc = 0 if tot == 0 else (data['numerators']['this'] / tot * 100) data['values'] = {'this': round(perc, 2)} else: data['values'] = dict(data['numerators']) data['numerators']['this'] = None else: calc_percent(data) calc_percent(root_data) add_metadata(root_data, model) return root_data, grand_total
def get_demographics_profile(geo_code, geo_level, session): # sex sex_dist_data, total_pop = get_stat_data( 'sex', geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban']) # urban/rural by sex urban_dist_data, _ = get_stat_data( ['rural or urban', 'sex'], geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban']) total_urbanised = 0 for data in urban_dist_data['Urban'].itervalues(): if 'numerators' in data: total_urbanised += data['numerators']['this'] # median age db_model_age = get_model_from_fields(['age in completed years', 'sex', 'rural or urban'], geo_level) objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session, ['age in completed years']) objects = sorted((o for o in objects if getattr(o, 'age in completed years') != 'unspecified'), key=lambda x: int(getattr(x, 'age in completed years').replace('+' ,''))) median = calculate_median(objects, 'age in completed years') # age in 10 year groups def age_recode(f, x): age = int(x.replace('+', '')) if age >= 80: return '80+' bucket = 10 * (age / 10) return '%d-%d' % (bucket, bucket+9) age_dist_data, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_recode, exclude=['unspecified']) # age category def age_cat_recode(f, x): age = int(x.replace('+', '')) if age < 18: return 'Under 18' elif age >= 65: return '65 and over' else: return '18 to 64' age_cats, _ = get_stat_data( 'age in completed years', geo_level, geo_code, session, table_fields=['age in completed years', 'sex', 'rural or urban'], recode=age_cat_recode, exclude=['unspecified']) final_data = { 'sex_ratio': sex_dist_data, 'urban_distribution': urban_dist_data, 'urbanised': { 'name': 'In urban areas', 'numerators': {'this': total_urbanised}, 'values': {'this': round(total_urbanised / total_pop * 100, 2)} }, 'age_group_distribution': age_dist_data, 'age_category_distribution': age_cats, 'median_age': { "name": "Median age", "values": {"this": median}, }, 'total_population': { "name": "People", "values": {"this": total_pop} }} return final_data
def get_service_delivery_profile(geo_code, geo_level, session): # water source water_src_data, total_wsrc = get_stat_data( ['source of water'], geo_level, geo_code, session, recode=SHORT_WATER_SOURCE_CATEGORIES, order_by='-total') if 'Service provider' in water_src_data: total_water_sp = water_src_data['Service provider']['numerators'][ 'this'] else: total_water_sp = 0.0 # refuse disposal db_model_ref = get_model_from_fields(['refuse disposal'], geo_level) objects = get_objects_by_geo(db_model_ref, geo_code, geo_level, session, order_by='-total') refuse_disp_data = OrderedDict() total_ref = 0.0 total_ref_sp = 0.0 for obj in objects: attr = getattr(obj, 'refuse disposal') disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr] refuse_disp_data[disp] = { "name": disp, "numerators": { "this": obj.total }, } total_ref += obj.total if attr.startswith('Removed by local authority'): total_ref_sp += obj.total # electricity elec_attrs = [ 'electricity for cooking', 'electricity for heating', 'electricity for lighting' ] db_model_elec = get_model_from_fields(elec_attrs, geo_level) objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session) total_elec = 0.0 total_some_elec = 0.0 elec_access_data = { 'total_all_elec': { "name": "Have electricity for everything", "numerators": { "this": 0.0 }, }, 'total_some_not_all_elec': { "name": "Have electricity for some things", "numerators": { "this": 0.0 }, }, 'total_no_elec': { "name": "No electricity", "numerators": { "this": 0.0 }, } } for obj in objects: total_elec += obj.total has_some = False has_all = True for attr in elec_attrs: val = not getattr(obj, attr).startswith('no ') has_all = has_all and val has_some = has_some or val if has_some: total_some_elec += obj.total if has_all: elec_access_data['total_all_elec']['numerators'][ 'this'] += obj.total elif has_some: elec_access_data['total_some_not_all_elec']['numerators'][ 'this'] += obj.total else: elec_access_data['total_no_elec']['numerators'][ 'this'] += obj.total for data, total in zip((refuse_disp_data, elec_access_data), (total_ref, total_elec)): for fields in data.values(): fields["values"] = { "this": percent(fields["numerators"]["this"], total) } add_metadata(refuse_disp_data, db_model_ref) add_metadata(elec_access_data, db_model_elec) # toilets toilet_data, total_toilet = get_stat_data( ['toilet facilities'], geo_level, geo_code, session, exclude_zero=True, recode=COLLAPSED_TOILET_CATEGORIES, order_by='-total') total_flush_toilet = 0.0 total_no_toilet = 0.0 for key, data in toilet_data.iteritems(): if key.startswith('Flush') or key.startswith('Chemical'): total_flush_toilet += data['numerators']['this'] if key == 'None': total_no_toilet += data['numerators']['this'] return { 'water_source_distribution': water_src_data, 'percentage_water_from_service_provider': { "name": "Are getting water from a regional or local service provider", "numerators": { "this": total_water_sp }, "values": { "this": percent(total_water_sp, total_wsrc) }, }, 'refuse_disposal_distribution': refuse_disp_data, 'percentage_ref_disp_from_service_provider': { "name": "Are getting refuse disposal from a local authority or private company", "numerators": { "this": total_ref_sp }, "values": { "this": percent(total_ref_sp, total_ref) }, }, 'percentage_electricity_access': { "name": "Have electricity for at least one of cooking, heating or lighting", "numerators": { "this": total_some_elec }, "values": { "this": percent(total_some_elec, total_elec) }, }, 'electricity_access_distribution': elec_access_data, 'percentage_flush_toilet_access': { "name": "Have access to flush or chemical toilets", "numerators": { "this": total_flush_toilet }, "values": { "this": percent(total_flush_toilet, total_toilet) }, }, 'percentage_no_toilet_access': { "name": "Have no access to any toilets", "numerators": { "this": total_no_toilet }, "values": { "this": percent(total_no_toilet, total_toilet) }, }, 'toilet_facilities_distribution': toilet_data, }
def get_households_profile(geo_code, geo_level, session): # head of household # gender db_model_gender = get_model_from_fields(['gender of head of household'], geo_level) objects = get_objects_by_geo(db_model_gender, geo_code, geo_level, session) total_households = 0.0 female_heads = 0.0 for obj in objects: total_households += obj.total gender = getattr(obj, 'gender of head of household') if gender == 'Unspecified': continue if gender == 'Female': female_heads += obj.total # age db_model_age = get_model_from_fields(['age of household head'], geo_level) objects = get_objects_by_geo(db_model_age, geo_code, geo_level, session) total_under_20 = 0.0 for obj in objects: age = getattr(obj, 'age of household head') if age in ['10 - 14', '15 - 19']: total_under_20 += obj.total # tenure db_model_tenure = get_model_from_fields(['tenure status'], geo_level) objects = get_objects_by_geo(db_model_tenure, geo_code, geo_level, session) tenure_data = {} owned = 0.0 for obj in objects: tenure = getattr(obj, 'tenure status') if tenure.startswith('Owned'): owned += obj.total tenure_data[tenure] = { "name": tenure, "values": {"this": round(obj.total / total_households * 100, 2)}, "numerators": {"this": obj.total}, } add_metadata(tenure_data, db_model_tenure) # type of dwelling db_model_dwelling = get_model_from_fields(['type of dwelling'], geo_level) objects = get_objects_by_geo(db_model_dwelling, geo_code, geo_level, session) informal = 0.0 for obj in objects: dwelling = getattr(obj, 'type of dwelling') if dwelling.startswith('Informal'): informal += obj.total # household goods household_goods, _ = get_stat_data( ['household goods'], geo_level, geo_code, session, total=total_households, recode=HOUSEHOLD_GOODS_RECODE, exclude=['total households'], key_order=sorted(HOUSEHOLD_GOODS_RECODE.values())) return {'total_households': { 'name': 'Households', 'values': {'this': total_households}, }, 'owned': { 'name': 'Households fully owned or being paid off', 'values': {'this': round(owned / total_households * 100, 2)}, 'numerators': {'this': owned}, }, 'informal': { 'name': 'Households that are informal dwellings (shacks)', 'values': {'this': round(informal / total_households * 100, 2)}, 'numerators': {'this': informal}, }, 'tenure_distribution': tenure_data, 'household_goods': household_goods, 'head_of_household': { 'female': { 'name': 'Households with women as their head', 'values': {'this': round(female_heads / total_households * 100, 2)}, 'numerators': {'this': female_heads}, }, 'under_20': { 'name': 'Households with heads under 20 years old', 'values': {'this': total_under_20}, } }, }
elif len(geo_name.split(':')[0]) in (5, 6): geo_level = 'municipality' elif 'Ward' in geo_name: geo_level = 'ward' elif len(geo_name.split(':')[0]) >= 7: geo_level = 'province' session = get_session() province_codes = dict((p.name, p.code) for p in session.query(Province)) session.close() elif geo_name.startswith('DC'): geo_level = 'district' else: raise ValueError("Cannot recognize the geo level of data") # get db model and create table if necessary db_model = get_model_from_fields(fields, geo_level, table_name) Base.metadata.create_all(_engine, tables=[db_model.__table__]) # restart generator data = open_census_csv(filepath) next(data) # skip field name and categories session = get_session() for geo_name, values in data: if geo_level == 'province': code = province_codes[geo_name] elif geo_name == 'country': code = None else: code = geo_name.split(':')[0] base_kwargs = {'%s_code' % geo_level: code} if code else {}
def get_service_delivery_profile(geo_code, geo_level, session): # water source water_src_data, total_wsrc = get_stat_data( ['source of water'], geo_level, geo_code, session, recode=SHORT_WATER_SOURCE_CATEGORIES, order_by='-total') if 'Service provider' in water_src_data: total_water_sp = water_src_data['Service provider']['numerators']['this'] else: total_water_sp = 0.0 # refuse disposal db_model_ref = get_model_from_fields(['refuse disposal'], geo_level) objects = get_objects_by_geo(db_model_ref, geo_code, geo_level, session, order_by='-total') refuse_disp_data = OrderedDict() total_ref = 0.0 total_ref_sp = 0.0 for obj in objects: attr = getattr(obj, 'refuse disposal') disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr] refuse_disp_data[disp] = { "name": disp, "numerators": {"this": obj.total}, } total_ref += obj.total if attr.startswith('Removed by local authority'): total_ref_sp += obj.total # electricity elec_attrs = ['electricity for cooking', 'electricity for heating', 'electricity for lighting'] db_model_elec = get_model_from_fields(elec_attrs, geo_level) objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session) total_elec = 0.0 total_some_elec = 0.0 elec_access_data = { 'total_all_elec': { "name": "Have electricity for everything", "numerators": {"this": 0.0}, }, 'total_some_not_all_elec': { "name": "Have electricity for some things", "numerators": {"this": 0.0}, }, 'total_no_elec': { "name": "No electricity", "numerators": {"this": 0.0}, } } for obj in objects: total_elec += obj.total has_some = False has_all = True for attr in elec_attrs: val = not getattr(obj, attr).startswith('no ') has_all = has_all and val has_some = has_some or val if has_some: total_some_elec += obj.total if has_all: elec_access_data['total_all_elec']['numerators']['this'] += obj.total elif has_some: elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total else: elec_access_data['total_no_elec']['numerators']['this'] += obj.total for data, total in zip((refuse_disp_data, elec_access_data), (total_ref, total_elec)): for fields in data.values(): fields["values"] = {"this": percent(fields["numerators"]["this"], total)} add_metadata(refuse_disp_data, db_model_ref) add_metadata(elec_access_data, db_model_elec) # toilets toilet_data, total_toilet = get_stat_data( ['toilet facilities'], geo_level, geo_code, session, exclude_zero=True, recode=COLLAPSED_TOILET_CATEGORIES, order_by='-total') total_flush_toilet = 0.0 total_no_toilet = 0.0 for key, data in toilet_data.iteritems(): if key.startswith('Flush') or key.startswith('Chemical'): total_flush_toilet += data['numerators']['this'] if key == 'None': total_no_toilet += data['numerators']['this'] return {'water_source_distribution': water_src_data, 'percentage_water_from_service_provider': { "name": "Are getting water from a regional or local service provider", "numerators": {"this": total_water_sp}, "values": {"this": percent(total_water_sp, total_wsrc)}, }, 'refuse_disposal_distribution': refuse_disp_data, 'percentage_ref_disp_from_service_provider': { "name": "Are getting refuse disposal from a local authority or private company", "numerators": {"this": total_ref_sp}, "values": {"this": percent(total_ref_sp, total_ref)}, }, 'percentage_electricity_access': { "name": "Have electricity for at least one of cooking, heating or lighting", "numerators": {"this": total_some_elec}, "values": {"this": percent(total_some_elec, total_elec)}, }, 'electricity_access_distribution': elec_access_data, 'percentage_flush_toilet_access': { "name": "Have access to flush or chemical toilets", "numerators": {"this": total_flush_toilet}, "values": {"this": percent(total_flush_toilet, total_toilet)}, }, 'percentage_no_toilet_access': { "name": "Have no access to any toilets", "numerators": {"this": total_no_toilet}, "values": {"this": percent(total_no_toilet, total_toilet)}, }, 'toilet_facilities_distribution': toilet_data, }
def get_households_profile(geo_code, geo_level, session): # head of household # gender head_gender_dist, total_households = get_stat_data( ['gender of household head'], geo_level, geo_code, session, order_by='gender of household head') female_heads = head_gender_dist['Female']['numerators']['this'] # age db_model_u18 = get_model_from_fields( ['gender of head of household'], geo_level, table_name='genderofheadofhouseholdunder18_%s' % geo_level ) objects = get_objects_by_geo(db_model_u18, geo_code, geo_level, session) total_under_18 = float(sum(o[0] for o in objects)) # tenure tenure_data, _ = get_stat_data( ['tenure status'], geo_level, geo_code, session, order_by='tenure status') owned = 0 for key, data in tenure_data.iteritems(): if key.startswith('Owned'): owned += data['numerators']['this'] # annual household income income_dist_data, _ = get_stat_data( ['annual household income'], geo_level, geo_code, session, exclude=['Unspecified'], recode=HOUSEHOLD_INCOME_RECODE, key_order=HOUSEHOLD_INCOME_RECODE.values(), table_name='annualhouseholdincome_genderofhouseholdhead_%s' % geo_level) # median income median = calculate_median_stat(income_dist_data) median_income = HOUSEHOLD_INCOME_ESTIMATE[median] # type of dwelling type_of_dwelling_dist, _ = get_stat_data( ['type of dwelling'], geo_level, geo_code, session, recode=TYPE_OF_DWELLING_RECODE, order_by='-total') informal = type_of_dwelling_dist['Shack']['numerators']['this'] # household goods household_goods, _ = get_stat_data( ['household goods'], geo_level, geo_code, session, total=total_households, recode=HOUSEHOLD_GOODS_RECODE, exclude=['total households'], key_order=sorted(HOUSEHOLD_GOODS_RECODE.values())) return {'total_households': { 'name': 'Households', 'values': {'this': total_households}, }, 'owned': { 'name': 'Households fully owned or being paid off', 'values': {'this': percent(owned, total_households)}, 'numerators': {'this': owned}, }, 'type_of_dwelling_distribution': type_of_dwelling_dist, 'informal': { 'name': 'Households that are informal dwellings (shacks)', 'values': {'this': percent(informal, total_households)}, 'numerators': {'this': informal}, }, 'tenure_distribution': tenure_data, 'household_goods': household_goods, 'annual_income_distribution': income_dist_data, 'median_annual_income': { 'name': 'Average annual household income', 'values': {'this': median_income}, }, 'head_of_household': { 'gender_distribution': head_gender_dist, 'female': { 'name': 'Households with women as their head', 'values': {'this': percent(female_heads, total_households)}, 'numerators': {'this': female_heads}, }, 'under_18': { 'name': 'Households with heads under 18 years old', 'values': {'this': total_under_18}, } }, }
def get_demographics_profile(geo_code, geo_level, session): # population group pop_dist_data, total_pop = get_stat_data( ['population group'], geo_level, geo_code, session) # language language_data, _ = get_stat_data( ['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo_level, geo_code, session, recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender_%s' % geo_level) query = session.query(func.sum(db_model_sex.total)) \ .filter(db_model_sex.gender == 'Male') geo_attr = '%s_code' % geo_level query = query.filter(getattr(db_model_sex, geo_attr) == geo_code) total_male = query.one()[0] sex_data = OrderedDict(( # census data refers to sex as gender ('Female', { "name": "Female", "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)}, "numerators": {"this": total_pop - total_male}, }), ('Male', { "name": "Male", "values": {"this": round(total_male / total_pop * 100, 2)}, "numerators": {"this": total_male}, }), )) add_metadata(sex_data, db_model_sex) final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": {"this": total_pop}, } } geo = get_geography(geo_code, geo_level) if geo.square_kms: final_data['population_density'] = { 'name': "people per square kilometre", 'values': {"this": total_pop / geo.square_kms}, } # median age/age category db_model_age = get_model_from_fields( ['age in completed years'], geo_level, table_name='ageincompletedyears_%s' % geo_level ) objects = sorted( get_objects_by_geo(db_model_age, geo_code, geo_level, session), key=lambda x: int(getattr(x, 'age in completed years')) ) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": {"this": median}, } # age category age_dist, _ = get_stat_data( ['age in completed years'], geo_level, geo_code, session, table_name='ageincompletedyearssimplified_%s' % geo_level, key_order=['Under 18', '18 to 64', '65 and over'], recode={'< 18': 'Under 18', '>= 65': '65 and over'}) final_data['age_category_distribution'] = age_dist # citizenship citizenship_dist, _ = get_stat_data( ['citizenship'], geo_level, geo_code, session, order_by='-total') sa_citizen = citizenship_dist['Yes']['numerators']['this'] final_data['citizenship_distribution'] = citizenship_dist final_data['citizenship_south_african'] = { 'name': 'South African citizens', 'values': {'this': percent(sa_citizen, total_pop)}, 'numerators': {'this': sa_citizen}, } # migration province_of_birth_dist, _ = get_stat_data( ['province of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total') final_data['province_of_birth_distribution'] = province_of_birth_dist def region_recode(field, key): if key == 'Born in South Africa': return 'South Africa' else: return key region_of_birth_dist, _ = get_stat_data( ['region of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total', recode=region_recode) if 'South Africa' in region_of_birth_dist: born_in_sa = region_of_birth_dist['South Africa']['numerators']['this'] else: born_in_sa = 0 final_data['region_of_birth_distribution'] = region_of_birth_dist final_data['born_in_south_africa'] = { 'name': 'Born in South Africa', 'values': {'this': percent(born_in_sa, total_pop)}, 'numerators': {'this': born_in_sa}, } return final_data
def get_demographics_profile(geo_code, geo_level, session): # population group pop_dist_data, total_pop = get_stat_data( ['population group'], geo_level, geo_code, session) # language language_data, _ = get_stat_data( ['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo_level, geo_code, session, recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex db_model_sex = get_model_from_fields(['gender'], geo_level) query = session.query(func.sum(db_model_sex.total)) \ .filter(db_model_sex.gender == 'Male') geo_attr = '%s_code' % geo_level query = query.filter(getattr(db_model_sex, geo_attr) == geo_code) total_male = query.one()[0] sex_data = OrderedDict(( # census data refers to sex as gender ('Female', { "name": "Female", "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)}, "numerators": {"this": total_pop - total_male}, }), ('Male', { "name": "Male", "values": {"this": round(total_male / total_pop * 100, 2)}, "numerators": {"this": total_male}, }), )) add_metadata(sex_data, db_model_sex) final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": {"this": total_pop} }} # median age/age category db_model_age = get_model_from_fields(['age in completed years'], geo_level) objects = sorted( get_objects_by_geo(db_model_age, geo_code, geo_level, session), key=lambda x: int(getattr(x, 'age in completed years')) ) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": {"this": median}, } # age category under_18 = 0.0 over_or_65 = 0.0 between_18_64 = 0.0 total = 0.0 for obj in objects: age = int(getattr(obj, 'age in completed years')) total += obj.total if age < 18: under_18 += obj.total elif age >= 65: over_or_65 += obj.total else: between_18_64 += obj.total age_dist = OrderedDict(( ("under_18", { "name": "Under 18", "values": {"this": round(under_18 / total * 100, 2)} }), ("18_to_64", { "name": "18 to 64", "values": {"this": round(between_18_64 / total * 100, 2)} }), ("65_and_over", { "name": "65 and over", "values": {"this": round(over_or_65 / total * 100, 2)} }))) add_metadata(age_dist, db_model_age) final_data['age_category_distribution'] = age_dist # citizenship citizenship_dist, _ = get_stat_data( ['citizenship'], geo_level, geo_code, session, order_by='-total') sa_citizen = citizenship_dist['Yes']['numerators']['this'] final_data['citizenship_distribution'] = citizenship_dist final_data['citizenship_south_african'] = { 'name': 'South African citizens', 'values': {'this': percent(sa_citizen, total_pop)}, 'numerators': {'this': sa_citizen}, } # migration province_of_birth_dist, _ = get_stat_data( ['province of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total') final_data['province_of_birth_distribution'] = province_of_birth_dist def region_recode(field, key): if key == 'Born in South Africa': return 'South Africa' else: return key region_of_birth_dist, _ = get_stat_data( ['region of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total', recode=region_recode) if 'South Africa' in region_of_birth_dist: born_in_sa = region_of_birth_dist['South Africa']['numerators']['this'] else: born_in_sa = 0 final_data['region_of_birth_distribution'] = region_of_birth_dist final_data['born_in_south_africa'] = { 'name': 'Born in South Africa', 'values': {'this': percent(born_in_sa, total_pop)}, 'numerators': {'this': born_in_sa}, } return final_data
def get_demographics_profile(geo_code, geo_level, session): # population group pop_dist_data, total_pop = get_stat_data(['population group'], geo_level, geo_code, session) # language language_data, _ = get_stat_data(['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo_level, geo_code, session, recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex db_model_sex = get_model_from_fields(['gender'], geo_level, table_name='gender_%s' % geo_level) query = session.query(func.sum(db_model_sex.total)) \ .filter(db_model_sex.gender == 'Male') geo_attr = '%s_code' % geo_level query = query.filter(getattr(db_model_sex, geo_attr) == geo_code) total_male = query.one()[0] sex_data = OrderedDict(( # census data refers to sex as gender ('Female', { "name": "Female", "values": { "this": round((total_pop - total_male) / total_pop * 100, 2) }, "numerators": { "this": total_pop - total_male }, }), ('Male', { "name": "Male", "values": { "this": round(total_male / total_pop * 100, 2) }, "numerators": { "this": total_male }, }), )) add_metadata(sex_data, db_model_sex) final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": { "this": total_pop }, } } geo = get_geography(geo_code, geo_level) if geo.square_kms: final_data['population_density'] = { 'name': "people per square kilometre", 'values': { "this": total_pop / geo.square_kms }, } # median age/age category db_model_age = get_model_from_fields(['age in completed years'], geo_level, table_name='ageincompletedyears_%s' % geo_level) objects = sorted(get_objects_by_geo(db_model_age, geo_code, geo_level, session), key=lambda x: int(getattr(x, 'age in completed years'))) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": { "this": median }, } # age category age_dist, _ = get_stat_data( ['age in completed years'], geo_level, geo_code, session, table_name='ageincompletedyearssimplified_%s' % geo_level, key_order=['Under 18', '18 to 64', '65 and over'], recode={ '< 18': 'Under 18', '>= 65': '65 and over' }) final_data['age_category_distribution'] = age_dist # citizenship citizenship_dist, _ = get_stat_data(['citizenship'], geo_level, geo_code, session, order_by='-total') sa_citizen = citizenship_dist['Yes']['numerators']['this'] final_data['citizenship_distribution'] = citizenship_dist final_data['citizenship_south_african'] = { 'name': 'South African citizens', 'values': { 'this': percent(sa_citizen, total_pop) }, 'numerators': { 'this': sa_citizen }, } # migration province_of_birth_dist, _ = get_stat_data(['province of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total') final_data['province_of_birth_distribution'] = province_of_birth_dist def region_recode(field, key): if key == 'Born in South Africa': return 'South Africa' else: return key region_of_birth_dist, _ = get_stat_data(['region of birth'], geo_level, geo_code, session, exclude_zero=True, order_by='-total', recode=region_recode) if 'South Africa' in region_of_birth_dist: born_in_sa = region_of_birth_dist['South Africa']['numerators']['this'] else: born_in_sa = 0 final_data['region_of_birth_distribution'] = region_of_birth_dist final_data['born_in_south_africa'] = { 'name': 'Born in South Africa', 'values': { 'this': percent(born_in_sa, total_pop) }, 'numerators': { 'this': born_in_sa }, } return final_data
def get_demographics_profile(geo_code, geo_level, session): # population group pop_dist_data, total_pop = get_stat_data( ['population group'], geo_level, geo_code, session) # language language_data, _ = get_stat_data( ['language'], geo_level, geo_code, session, order_by='-total') language_most_spoken = language_data[language_data.keys()[0]] # age groups age_dist_data, total_age = get_stat_data( ['age groups in 5 years'], geo_level, geo_code, session, recode=COLLAPSED_AGE_CATEGORIES, key_order=('0-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70-79', '80+')) # sex db_model_sex = get_model_from_fields(['gender'], geo_level) query = session.query(func.sum(db_model_sex.total)) \ .filter(db_model_sex.gender == 'Male') geo_attr = '%s_code' % geo_level query = query.filter(getattr(db_model_sex, geo_attr) == geo_code) total_male = query.one()[0] sex_data = OrderedDict(( # census data refers to sex as gender ('Female', { "name": "Female", "values": {"this": round((total_pop - total_male) / total_pop * 100, 2)}, "numerators": {"this": total_male}, }), ('Male', { "name": "Male", "values": {"this": round(total_male / total_pop * 100, 2)}, "numerators": {"this": total_pop - total_male}, }), )) add_metadata(sex_data, db_model_sex) final_data = { 'language_distribution': language_data, 'language_most_spoken': language_most_spoken, 'population_group_distribution': pop_dist_data, 'age_group_distribution': age_dist_data, 'sex_ratio': sex_data, 'total_population': { "name": "People", "values": {"this": total_pop} }} # median age/age category db_model_age = get_model_from_fields(['age in completed years'], geo_level) objects = sorted( get_objects_by_geo(db_model_age, geo_code, geo_level, session), key=lambda x: int(getattr(x, 'age in completed years')) ) # median age median = calculate_median(objects, 'age in completed years') final_data['median_age'] = { "name": "Median age", "values": {"this": median}, } # age category under_18 = 0.0 over_or_65 = 0.0 between_18_64 = 0.0 total = 0.0 for obj in objects: age = int(getattr(obj, 'age in completed years')) total += obj.total if age < 18: under_18 += obj.total elif age >= 65: over_or_65 += obj.total else: between_18_64 += obj.total age_dist = OrderedDict(( ("under_18", { "name": "Under 18", "values": {"this": round(under_18 / total * 100, 2)} }), ("18_to_64", { "name": "18 to 64", "values": {"this": round(between_18_64 / total * 100, 2)} }), ("65_and_over", { "name": "65 and over", "values": {"this": round(over_or_65 / total * 100, 2)} }) )) add_metadata(age_dist, db_model_age) final_data['age_category_distribution'] = age_dist return final_data
def get_households_profile(geo_code, geo_level, session): # head of household # gender head_gender_dist, total_households = get_stat_data( ['gender of household head'], geo_level, geo_code, session, order_by='gender of household head') female_heads = head_gender_dist['Female']['numerators']['this'] # age db_model_u18 = get_model_from_fields( ['gender of head of household'], geo_level, table_name='genderofheadofhouseholdunder18_%s' % geo_level) objects = get_objects_by_geo(db_model_u18, geo_code, geo_level, session) total_under_18 = float(sum(o[0] for o in objects)) # tenure tenure_data, _ = get_stat_data(['tenure status'], geo_level, geo_code, session, order_by='tenure status') owned = 0 for key, data in tenure_data.iteritems(): if key.startswith('Owned'): owned += data['numerators']['this'] # annual household income income_dist_data, _ = get_stat_data( ['annual household income'], geo_level, geo_code, session, exclude=['Unspecified'], recode=HOUSEHOLD_INCOME_RECODE, key_order=HOUSEHOLD_INCOME_RECODE.values(), table_name='annualhouseholdincome_genderofhouseholdhead_%s' % geo_level) # median income median = calculate_median_stat(income_dist_data) median_income = HOUSEHOLD_INCOME_ESTIMATE[median] # type of dwelling type_of_dwelling_dist, _ = get_stat_data(['type of dwelling'], geo_level, geo_code, session, recode=TYPE_OF_DWELLING_RECODE, order_by='-total') informal = type_of_dwelling_dist['Shack']['numerators']['this'] # household goods household_goods, _ = get_stat_data(['household goods'], geo_level, geo_code, session, total=total_households, recode=HOUSEHOLD_GOODS_RECODE, exclude=['total households'], key_order=sorted( HOUSEHOLD_GOODS_RECODE.values())) return { 'total_households': { 'name': 'Households', 'values': { 'this': total_households }, }, 'owned': { 'name': 'Households fully owned or being paid off', 'values': { 'this': percent(owned, total_households) }, 'numerators': { 'this': owned }, }, 'type_of_dwelling_distribution': type_of_dwelling_dist, 'informal': { 'name': 'Households that are informal dwellings (shacks)', 'values': { 'this': percent(informal, total_households) }, 'numerators': { 'this': informal }, }, 'tenure_distribution': tenure_data, 'household_goods': household_goods, 'annual_income_distribution': income_dist_data, 'median_annual_income': { 'name': 'Average annual household income', 'values': { 'this': median_income }, }, 'head_of_household': { 'gender_distribution': head_gender_dist, 'female': { 'name': 'Households with women as their head', 'values': { 'this': percent(female_heads, total_households) }, 'numerators': { 'this': female_heads }, }, 'under_18': { 'name': 'Households with heads under 18 years old', 'values': { 'this': total_under_18 }, } }, }
def get_service_delivery_profile(geo_code, geo_level, session): # water source db_model_wsrc = get_model_from_fields(['source of water'], geo_level) objects = get_objects_by_geo(db_model_wsrc, geo_code, geo_level, session, order_by='-total') water_src_data = OrderedDict() total_wsrc = 0.0 total_water_sp = 0.0 for obj in objects: attr = getattr(obj, 'source of water') src = SHORT_WATER_SOURCE_CATEGORIES[attr] water_src_data[src] = { "name": src, "numerators": {"this": obj.total}, } total_wsrc += obj.total if attr.startswith('Regional/local water scheme'): total_water_sp += obj.total # refuse disposal db_model_ref = get_model_from_fields(['refuse disposal'], geo_level) objects = get_objects_by_geo(db_model_ref, geo_code, geo_level, session, order_by='-total') refuse_disp_data = OrderedDict() total_ref = 0.0 total_ref_sp = 0.0 for obj in objects: attr = getattr(obj, 'refuse disposal') disp = SHORT_REFUSE_DISPOSAL_CATEGORIES[attr] refuse_disp_data[disp] = { "name": disp, "numerators": {"this": obj.total}, } total_ref += obj.total if attr.startswith('Removed by local authority'): total_ref_sp += obj.total # electricity elec_attrs = ['electricity for cooking', 'electricity for heating', 'electricity for lighting'] db_model_elec = get_model_from_fields(elec_attrs, geo_level) objects = get_objects_by_geo(db_model_elec, geo_code, geo_level, session) total_elec = 0.0 total_some_elec = 0.0 elec_access_data = { 'total_all_elec': { "name": "Have electricity for everything", "numerators": {"this": 0.0}, }, 'total_some_not_all_elec': { "name": "Have electricity for some things", "numerators": {"this": 0.0}, }, 'total_no_elec': { "name": "No electricity", "numerators": {"this": 0.0}, } } for obj in objects: total_elec += obj.total has_some = False has_all = True for attr in elec_attrs: val = not getattr(obj, attr).startswith('no ') has_all = has_all and val has_some = has_some or val if has_some: total_some_elec += obj.total if has_all: elec_access_data['total_all_elec']['numerators']['this'] += obj.total elif has_some: elec_access_data['total_some_not_all_elec']['numerators']['this'] += obj.total else: elec_access_data['total_no_elec']['numerators']['this'] += obj.total # toilets db_model_toilet = get_model_from_fields(['toilet facilities'], geo_level) objects = get_objects_by_geo(db_model_toilet, geo_code, geo_level, session, order_by='-total') toilet_data = OrderedDict() total_toilet = 0.0 total_flush_toilet = 0.0 for obj in objects: name = getattr(obj, 'toilet facilities') toilet_data[name] = { "name": name, "numerators": {"this": obj.total}, } total_toilet += obj.total if name.startswith('Flush') or name.startswith('Chemical'): total_flush_toilet += obj.total total_no_toilet = toilet_data['None']['numerators']['this'] toilet_data = collapse_categories(toilet_data, COLLAPSED_TOILET_CATEGORIES, key_order=( 'Flush toilet', 'Chemical toilet', 'Pit toilet', 'Bucket toilet', 'Other', 'None', 'Unspecified', 'N/A')) for data, total in zip((water_src_data, refuse_disp_data, elec_access_data, toilet_data), (total_wsrc, total_ref, total_elec, total_toilet)): for fields in data.values(): fields["values"] = {"this": round(fields["numerators"]["this"] / total * 100, 2)} add_metadata(water_src_data, db_model_wsrc) add_metadata(refuse_disp_data, db_model_ref) add_metadata(elec_access_data, db_model_elec) add_metadata(toilet_data, db_model_toilet) return {'water_source_distribution': water_src_data, 'percentage_water_from_service_provider': { "name": "Are getting water from a regional or local service provider", "numerators": {"this": total_water_sp}, "values": {"this": round(total_water_sp / total_wsrc * 100, 2)}, }, 'refuse_disposal_distribution': refuse_disp_data, 'percentage_ref_disp_from_service_provider': { "name": "Are getting refuse disposal from a local authority or private company", "numerators": {"this": total_ref_sp}, "values": {"this": round(total_ref_sp / total_ref * 100, 2)}, }, 'percentage_electricity_access': { "name": "Have electricity for at least one of cooking, heating or lighting", "numerators": {"this": total_some_elec}, "values": {"this": round(total_some_elec / total_elec * 100, 2)} }, 'electricity_access_distribution': elec_access_data, 'percentage_flush_toilet_access': { "name": "Have access to flush or chemical toilets", "numerators": {"this": total_flush_toilet}, "values": {"this": round(total_flush_toilet / total_toilet * 100, 2)} }, 'percentage_no_toilet_access': { "name": "Have no access to any toilets", "numerators": {"this": total_no_toilet}, "values": {"this": round(total_no_toilet / total_toilet * 100, 2)} }, 'toilet_facilities_distribution': toilet_data, }
def get_economics_profile(geo_code, geo_level, session): # income db_model_income = get_model_from_fields(['individual monthly income'], geo_level, 'individualmonthlyincome_%s_employedonly' % geo_level) objects = get_objects_by_geo(db_model_income, geo_code, geo_level, session) income_dist_data = {} total_income = 0.0 for obj in objects: income_group = getattr(obj, 'individual monthly income') if income_group == 'Not applicable': continue total_income += obj.total income_dist_data[income_group] = { "name": income_group, "numerators": {"this": obj.total}, } key_order = COLLAPSED_INCOME_CATEGORIES.values() key_order.remove('N/A') income_dist_data = collapse_categories(income_dist_data, COLLAPSED_INCOME_CATEGORIES, key_order=key_order) db_model_employ = get_model_from_fields(['official employment status'], geo_level) objects = get_objects_by_geo(db_model_employ, geo_code, geo_level, session) employ_status = {} total_workers = 0.0 for obj in objects: employ_st = getattr(obj, 'official employment status') if employ_st in ('Age less than 15 years', 'Not applicable'): continue total_workers += obj.total employ_status[employ_st] = { "name": employ_st, "numerators": {"this": obj.total}, } # sector db_model_sector = get_model_from_fields(['type of sector'], geo_level) objects = get_objects_by_geo(db_model_sector, geo_code, geo_level, session, order_by='type of sector') sector_dist_data = OrderedDict() total_sector = 0.0 for obj in objects: sector = getattr(obj, 'type of sector') if sector == 'Not applicable' or obj.total == 0: continue total_sector += obj.total sector_dist_data[sector] = { "name": sector, "numerators": {"this": obj.total}, } for data, total in zip((income_dist_data, sector_dist_data, employ_status), (total_income, total_sector, total_workers)): for fields in data.values(): fields["values"] = {"this": round(fields["numerators"]["this"] / total * 100, 2)} income_dist_data['metadata'] = {'universe': 'Officially employed individuals'} employ_status['metadata'] = {'universe': 'Workers 15 and over'} add_metadata(income_dist_data, db_model_income) add_metadata(employ_status, db_model_employ) add_metadata(sector_dist_data, db_model_sector) return {'individual_income_distribution': income_dist_data, 'employment_status': employ_status, 'sector_type_distribution': sector_dist_data}